diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-17 05:40:02 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-17 05:40:02 +0200 |
commit | 6661224e66f03706daea8e27714436851cf01731 (patch) | |
tree | 97e80db55bb97c3e571afd25a14df95439f38d12 /include | |
parent | 79e06c4c4950be2abd8ca5d2428a8c915aa62c24 (diff) | |
parent | e2a58d2d3416aceeae63dfc7bf680dd390ff331d (diff) |
Merge tag 'unicode-for-next-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/krisman/unicode
Pull unicode updates from Gabriel Krisman Bertazi:
"This includes patches from Christoph Hellwig to split the large data
tables of the unicode subsystem into a loadable module, which allow
users to not have them around if case-insensitive filesystems are not
to be used. It also includes minor code fixes to unicode and its
users, from the same author.
All the patches here have been on linux-next releases for the past
months"
* tag 'unicode-for-next-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/krisman/unicode:
unicode: only export internal symbols for the selftests
unicode: Add utf8-data module
unicode: cache the normalization tables in struct unicode_map
unicode: move utf8cursor to utf8-selftest.c
unicode: simplify utf8len
unicode: remove the unused utf8{,n}age{min,max} functions
unicode: pass a UNICODE_AGE() tripple to utf8_load
unicode: mark the version field in struct unicode_map unsigned
unicode: remove the charset field from struct unicode_map
f2fs: simplify f2fs_sb_read_encoding
ext4: simplify ext4_sb_read_encoding
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/unicode.h | 49 |
1 files changed, 46 insertions, 3 deletions
diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 74484d44c755..4d39e6e11a95 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -5,9 +5,52 @@ #include <linux/init.h> #include <linux/dcache.h> +struct utf8data; +struct utf8data_table; + +#define UNICODE_MAJ_SHIFT 16 +#define UNICODE_MIN_SHIFT 8 + +#define UNICODE_AGE(MAJ, MIN, REV) \ + (((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) | \ + ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \ + ((unsigned int)(REV))) + +static inline u8 unicode_major(unsigned int age) +{ + return (age >> UNICODE_MAJ_SHIFT) & 0xff; +} + +static inline u8 unicode_minor(unsigned int age) +{ + return (age >> UNICODE_MIN_SHIFT) & 0xff; +} + +static inline u8 unicode_rev(unsigned int age) +{ + return age & 0xff; +} + +/* + * Two normalization forms are supported: + * 1) NFDI + * - Apply unicode normalization form NFD. + * - Remove any Default_Ignorable_Code_Point. + * 2) NFDICF + * - Apply unicode normalization form NFD. + * - Remove any Default_Ignorable_Code_Point. + * - Apply a full casefold (C + F). + */ +enum utf8_normalization { + UTF8_NFDI = 0, + UTF8_NFDICF, + UTF8_NMAX, +}; + struct unicode_map { - const char *charset; - int version; + unsigned int version; + const struct utf8data *ntab[UTF8_NMAX]; + const struct utf8data_table *tables; }; int utf8_validate(const struct unicode_map *um, const struct qstr *str); @@ -30,7 +73,7 @@ int utf8_casefold(const struct unicode_map *um, const struct qstr *str, int utf8_casefold_hash(const struct unicode_map *um, const void *salt, struct qstr *str); -struct unicode_map *utf8_load(const char *version); +struct unicode_map *utf8_load(unsigned int version); void utf8_unload(struct unicode_map *um); #endif /* _LINUX_UNICODE_H */ |