This page was inspired by a question posed in comp.lang.c. What is the best way to find a 2 character pattern in an unsigned character array of known length? The array can contain the character '\0', so its not a string.
The obvious solution is to just walk the array looking for the two characters as shown below:
int
try_1(
unsigned len,
unsigned char str[],
unsigned char pat1,
unsigned char pat2
)
{
unsigned i;
if( len <= 1 ) {
return 0;
}
for( i = 0; i < len - 1; i++ ) {
if( str[i] == pat1 && str[i+1] == pat2 ) {
return 1;
}
}
return 0;
}
The code above loads characters twice; once as str[i+1]
and then as str[i]
. An obvious optimization is to avoid loading the same character twice.
int
try_2(
unsigned len,
unsigned char str[],
unsigned char pat1,
unsigned char pat2
)
{
int i;
int c;
if( len <= 1 ) {
return 0;
}
i = 0;
while( 1 ) {
/* can't generate a match after first */
if( i == len - 1 ) {
return 0;
}
c = str[i];
i++;
match_first:;
if( c == pat1 ) {
c = str[i];
i++;
if( c == pat2 ) {
return 1;
}
if( i == len ) {
return 0;
}
goto match_first;
}
}
}
The code above has two stopping conditions:
The two stopping conditions can be reduced to one by adding the pattern at the end of the existing array. This way the pattern is guaranteed to be found, so the end of array test is not needed. However, after the pattern is found, we have to test whether the pattern was the one we added.
int
try_3(
unsigned len,
unsigned char str[],
unsigned char pat1,
unsigned char pat2
)
{
int i;
int c;
str[len] = pat1;
str[len+1] = pat2;
i = 0;
while( 1 ) {
c = str[i];
i++;
match_first:;
if( c == pat1 ) {
c = str[i];
i++;
if( c == pat2 ) {
goto found_match;
}
goto match_first;
}
}
found_match:;
return i <= len;
}
One problem with this code is that it is not always possible to write beyond the end of the array. In that case, we can still use the same idea, by writing the pattern over the last two characters of the existing array. In that case we have to do extra work to save and restore those last two characters.
int
try_4(
unsigned len,
unsigned char str[],
unsigned char pat1,
unsigned char pat2
)
{
int i;
int c;
int old_1;
int old_2;
if( len <= 1 ) {
return 0;
}
old_1 = str[len-2];
old_2 = str[len-1];
if( old_1 == pat1 && old_2 == pat2 ) {
return 1;
}
if( len == 2 ) {
return 0;
}
if( old_1 == pat2 && str[len-3] == pat1 ) {
return 1;
}
if( len == 3 ) {
return 0;
}
str[len-2] = pat1;
str[len-1] = pat2;
i = 0;
while( 1 ) {
c = str[i];
i++;
match_first:;
if( c == pat1 ) {
c = str[i];
i++;
if( c == pat2 ) {
goto found_match;
}
goto match_first;
}
}
found_match:;
str[len-2] = old_1;
str[len-1] = old_2;
return i <= len-1;
}
The codes above operates read one character at a time from memory. However, most processors efficiently load multiple bytes from memory, and operate on multiple bytes at a time. We can exploit this ability to produce code, that on many high-end modern processors, should run faster than the character-at-a-time codes shown above.
Before we get into the following code, a warning: this code is non-portable for at least 2 reasons.
unsigned
to be 32 bits and unsigned char
to be 8 bits. For processors that support 64 bits, we should rewrite the code to use a 64-bit unsigned type. For processors that support only 16 bits efficiently, the following code is probably much slower than the character-at-a-time codes above.
unsigned
from arbitrary alignments. Some processors do not support this at all, and others read non-aligned data very slowly.
int
try_5(
unsigned len,
unsigned char str[],
unsigned char pat1,
unsigned char pat2
)
{
unsigned len_4 = (len-1)>>2;
unsigned mask1 = (pat1<<24)|(pat1<<16)|(pat1<<8)|(pat1);
unsigned mask2 = (pat2<<24)|(pat2<<16)|(pat2<<8)|(pat2);
unsigned ones = 0x7f7f7f7f;
unsigned compl = ~ones;
unsigned * p1 = (unsigned *)str;
unsigned * p2 = (unsigned *)(str+1);
unsigned w1;
unsigned w2;
unsigned v1;
unsigned v2;
unsigned i;
if( len <= 1 ) {
return 0;
}
for( i = 0; i < len_4; i++ ) {
w1 = p1[i];
w1 ^= mask1;
v1 = w1&ones;
v1 += ones;
v1 |= w1;
v1 &= compl;
if( v1 != compl ) {
w2 = p2[i];
w2 ^= mask2;
v2 = w2&ones;
v2 += ones;
v2 |= w2;
v1 |= v2;
v1 &= compl;
if( v1 != compl ) {
return 1;
}
}
}
for( i = len_4*4; i < len-1; i++ ) {
if( str[i] == pat1 && str[i+1] == pat2 ) {
return 1;
}
}
return 0;
}
To understand how this code works, consider the following problem: given an octet, without using compares, return 0x00
if the given octet is 0 otherwise return 0x80
.
0x7f
.
0x80
and 0xfe
; the high-order bit is guaranteed to be 1.
We have glossed over one issue - what happens if we add 0x7f to an octet between 0x81 and 0xff? In that case the sum overflows, producing a number of greater than or equal to 0x100. This is not an issue if we are only dealing with one octet. The high-order bit is 1 because of the or-ing with the original octet, not because of the sum.
Now, we can take this idea and extend it to detecting whether there is a 0 octet in a word. However we now have to worry about overflow; the overflow from one octet can affect our results. We suppress this by first masking each octet in the group with 0x7f. The following code checks for the presence of a 0 octet in a 4 octet quantity.
mask = var & 0x7f7f7f7f; /* suppress overflow */
sum = mask + 0x7f7f7f7f; /* set high order bit for +ve octets */
or = sum | var; /* set high order bit for -ve octets */
and = or & 0x8080808; /* remove all but high-order bits */
is_0 = and != 0x80808080; /* if one octet was 0, then value will
* be something like 0x80.00.80.80
*/
To search for a pattern instead of 0s, we xor each octet with the octet being searched for; if they are equal the result will be 0. For multiple octets, xor with the concatenation of the sequence.
To adapt the multi-byte approach for use on processors that do not efficiently support unaligned loads, the following modifications can be made:
p1
is on a word boundary.
All of the approaches above examine every octet. However, if the unsigned characters in the pattern are different then it may be more efficient to examine every other character.
int
try_6(
unsigned len,
unsigned char str[],
unsigned char pat1,
unsigned char pat2
)
{
unsigned i;
int hit;
if( len <= 1 ) {
return 0;
}
if( str[0] == pat1 && str[1] == pat2 ) {
return 1;
}
if( str[len-2] == pat1 && str[len-1] == pat2 ) {
return 1;
}
for( i = 1; i <<len-1; i+=2 ) {
if( str[i] == pat1 )
if( str[i+1] == pat2 ) {
return 1;
}
}
else if( str[i] == pat2 ) {
if( str[i-1] == pat1 ) {
return 1;
}
}
}
return 0;
}
If the pattern is constant (or rarely changes), we can replace the two comparisons in the loop with a table lookup.
void
setup_try_7(
unsigned char pat1,
unsigned char pat2,
unsigned char tbl[]
)
{
for( i = 0; i <<(1<<CHAR_BIT); i++ ) {
tbl[i] = 0;
}
tbl[pat1] = 1;
tbl[pat2] = 2;
}
int
try_7(
unsigned len,
unsigned char str[],
unsigned char pat1,
unsigned char pat2,
unsigned char tbl[]
)
{
unsigned i;
int hit;
if( len <= 1 ) {
return 0;
}
if( str[0] == pat1 && str[1] == pat2 ) {
return 1;
}
if( str[len-2] == pat1 && str[len-1] == pat2 ) {
return 1;
}
for( i = 1; i <<len-1; i+=2 ) {
hit = tbl[str[i]];
if( hit ) {
if( hit == 1 ) {
if( str[i+1] == pat2 ) {
return 1;
}
}
else {
if( str[i-1] == pat2 ) {
return 1;
}
}
}
}
return 0;
}