Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * pgstrcasecmp.c
4 : : * Portable SQL-like case-independent comparisons and conversions.
5 : : *
6 : : * SQL99 specifies Unicode-aware case normalization, which we don't yet
7 : : * have the infrastructure for. Instead we use tolower() to provide a
8 : : * locale-aware translation. However, there are some locales where this
9 : : * is not right either (eg, Turkish may do strange things with 'i' and
10 : : * 'I'). Our current compromise is to use tolower() for characters with
11 : : * the high bit set, and use an ASCII-only downcasing for 7-bit
12 : : * characters.
13 : : *
14 : : * NB: this code should match downcase_truncate_identifier() in scansup.c.
15 : : *
16 : : *
17 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
18 : : *
19 : : * src/port/pgstrcasecmp.c
20 : : *
21 : : *-------------------------------------------------------------------------
22 : : */
23 : : #include "c.h"
24 : :
25 : : #include <ctype.h>
26 : :
27 : :
28 : : /*
29 : : * Case-independent comparison of two null-terminated strings.
30 : : */
31 : : int
7893 tgl@sss.pgh.pa.us 32 :CBC 11483040 : pg_strcasecmp(const char *s1, const char *s2)
33 : : {
34 : : for (;;)
35 : 5174686 : {
7779 bruce@momjian.us 36 : 16657726 : unsigned char ch1 = (unsigned char) *s1++;
37 : 16657726 : unsigned char ch2 = (unsigned char) *s2++;
38 : :
7893 tgl@sss.pgh.pa.us 39 [ + + ]: 16657726 : if (ch1 != ch2)
40 : : {
41 [ + + + + ]: 11453142 : if (ch1 >= 'A' && ch1 <= 'Z')
42 : 4680587 : ch1 += 'a' - 'A';
7296 bruce@momjian.us 43 [ + + - + ]: 6772555 : else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
7893 tgl@sss.pgh.pa.us 44 :UBC 0 : ch1 = tolower(ch1);
45 : :
7893 tgl@sss.pgh.pa.us 46 [ + + + + ]:CBC 11453142 : if (ch2 >= 'A' && ch2 <= 'Z')
47 : 2842754 : ch2 += 'a' - 'A';
7296 bruce@momjian.us 48 [ - + - - ]: 8610388 : else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
7893 tgl@sss.pgh.pa.us 49 :UBC 0 : ch2 = tolower(ch2);
50 : :
7893 tgl@sss.pgh.pa.us 51 [ + + ]:CBC 11453142 : if (ch1 != ch2)
52 : 10449065 : return (int) ch1 - (int) ch2;
53 : : }
54 [ + + ]: 6208661 : if (ch1 == 0)
55 : 1033975 : break;
56 : : }
57 : 1033975 : return 0;
58 : : }
59 : :
60 : : /*
61 : : * Case-independent comparison of two not-necessarily-null-terminated strings.
62 : : * At most n bytes will be examined from each string.
63 : : */
64 : : int
65 : 5591785 : pg_strncasecmp(const char *s1, const char *s2, size_t n)
66 : : {
67 [ + + ]: 8059910 : while (n-- > 0)
68 : : {
7779 bruce@momjian.us 69 : 6241314 : unsigned char ch1 = (unsigned char) *s1++;
70 : 6241314 : unsigned char ch2 = (unsigned char) *s2++;
71 : :
7893 tgl@sss.pgh.pa.us 72 [ + + ]: 6241314 : if (ch1 != ch2)
73 : : {
74 [ + + + + ]: 3864029 : if (ch1 >= 'A' && ch1 <= 'Z')
75 : 2721058 : ch1 += 'a' - 'A';
7296 bruce@momjian.us 76 [ - + - - ]: 1142971 : else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
7893 tgl@sss.pgh.pa.us 77 :UBC 0 : ch1 = tolower(ch1);
78 : :
7893 tgl@sss.pgh.pa.us 79 [ + + + + ]:CBC 3864029 : if (ch2 >= 'A' && ch2 <= 'Z')
80 : 153262 : ch2 += 'a' - 'A';
7296 bruce@momjian.us 81 [ - + - - ]: 3710767 : else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
7893 tgl@sss.pgh.pa.us 82 :UBC 0 : ch2 = tolower(ch2);
83 : :
7893 tgl@sss.pgh.pa.us 84 [ + + ]:CBC 3864029 : if (ch1 != ch2)
85 : 3773189 : return (int) ch1 - (int) ch2;
86 : : }
87 [ - + ]: 2468125 : if (ch1 == 0)
7893 tgl@sss.pgh.pa.us 88 :UBC 0 : break;
89 : : }
7893 tgl@sss.pgh.pa.us 90 :CBC 1818596 : return 0;
91 : : }
92 : :
93 : : /*
94 : : * Fold a character to upper case.
95 : : *
96 : : * Unlike some versions of toupper(), this is safe to apply to characters
97 : : * that aren't lower case letters. Note however that the whole thing is
98 : : * a bit bogus for multibyte character sets.
99 : : */
100 : : unsigned char
101 : 165205 : pg_toupper(unsigned char ch)
102 : : {
103 [ + + + - ]: 165205 : if (ch >= 'a' && ch <= 'z')
104 : 97534 : ch += 'A' - 'a';
7296 bruce@momjian.us 105 [ - + - - ]: 67671 : else if (IS_HIGHBIT_SET(ch) && islower(ch))
7893 tgl@sss.pgh.pa.us 106 :UBC 0 : ch = toupper(ch);
7893 tgl@sss.pgh.pa.us 107 :CBC 165205 : return ch;
108 : : }
109 : :
110 : : /*
111 : : * Fold a character to lower case.
112 : : *
113 : : * Unlike some versions of tolower(), this is safe to apply to characters
114 : : * that aren't upper case letters. Note however that the whole thing is
115 : : * a bit bogus for multibyte character sets.
116 : : */
117 : : unsigned char
118 : 7379837 : pg_tolower(unsigned char ch)
119 : : {
120 [ + + + + ]: 7379837 : if (ch >= 'A' && ch <= 'Z')
121 : 4772431 : ch += 'a' - 'A';
7296 bruce@momjian.us 122 [ - + - - ]: 2607406 : else if (IS_HIGHBIT_SET(ch) && isupper(ch))
7893 tgl@sss.pgh.pa.us 123 :UBC 0 : ch = tolower(ch);
7893 tgl@sss.pgh.pa.us 124 :CBC 7379837 : return ch;
125 : : }
|