Rejoice!
I've found an easy way to pass that info. I'm working on the implementation, will post to p5p when ready, but the idea is
to add int PL_dir_unicode that will contain set of unicode flags. The idea is that syscalls that are unaware of these hints, will keep the default behavior, and new code will differentiate between bytes and unicode semantics.
The new unicode semantics is proposed along, that when compiled in has the following consequences. A) filename functions (as open, stat etc) have a chance to behave differently when passed unicode scalars b) after binmode(DIRHANDLE, ':utf8'), readdir will return unicode scalars, if appropriate. An interesting consequence of that would be that even on unicode-unaware OSes, readdir() will also return unicode scalars, without touching any system-specific code -- which I think is really cool.
The prototype is:
#define DIRf_HINT_WANT_UTF8_RESULT 1
#define DIRf_HINT_PARAM1_IS_UTF8 2
#define DIRf_HINT_PARAM2_IS_UTF8 4
#define DIRf_RESULT_IS_UTF8 8
#define DIRf_RESULT_IS_BYTES 16
#ifdef UTF8_FILENAME_SEMANTICS
#define SET_DIR_UTF8_HINTS(flags) PL_dir_unicode = (flags)
#define isDIR_RESULT_WANTED_AS_UTF8 ((PL_dir_unicode) & DIRf_HINT_W
+ANT_UTF8_RESULT)
#define isDIR_PARAM_UTF8 ((PL_dir_unicode) & DIRf_HINT_PARAM1_I
+S_UTF8)
#define isDIR_PARAM2_UTF8 ((PL_dir_unicode) & DIRf_HINT_PARAM2_
+IS_UTF8)
#define PERLIO_UTF8_CHECK_RESULT(sv) \
if ( PL_dir_unicode & DIRf_RESULT_IS_UTF8) { \
SvUTF8_on((sv)); \
} else if ( !(PL_dir_unicode & DIRf_RESULT_IS_BYTES)) { \
STRLEN len; \
const char * const s = SvPV(sv,len); \
if (is_utf8_string((const U8*)s,len)) { \
SvUTF8_on((sv)); \
} \
} \
#else
#define SET_DIR_UTF8_HINTS(flags)
#define isDIR_RESULT_WANTED_AS_UTF8 0
#define isDIR_PARAM_UTF8 0
#define isDIR_PARAM2_UTF8 0
#define PERLIO_UTF8_CHECK_RESULT(sv)
#endif
#define PERLIO_UTF8_CONTEXT(u1) \
SET_DIR_UTF8_HINTS((u1) ? DIRf_HINT_PARAM1_IS_UTF8 : 0)
#define PERLIO_UTF8_CONTEXT2(u1,u2) \
SET_DIR_UTF8_HINTS( \
((u1) ? DIRf_HINT_PARAM1_IS_UTF8 : 0) | \
((u2) ? DIRf_HINT_PARAM2_IS_UTF8 : 0))
#define PERLIO_UTF8_CONTEXT_FROM_SV(sv) \
PERLIO_UTF8_CONTEXT(SvUTF8(sv))
#define PERLIO_UTF8_CONTEXT_FROM_SV2(sv1,sv2) \
PERLIO_UTF8_CONTEXT(SvUTF8(sv1),SvUTF8(sv2))
#define PERLIO_UTF8_CONTEXT_RETURN(ret) \
SET_DIR_UTF8_HINTS((ret) ? DIRf_RESULT_IS_UTF8 : 0)
#define PERLIO_UTF8_CLEAR_CONTEXT \
SET_DIR_UTF8_HINTS(0)
caller code: (pp_stat, for example):
PERLIO_UTF8_CONTEXT_FROM_SV(sv);
if (PL_op->op_type == OP_LSTAT)
PL_laststatval = PerlLIO_lstat(SvPV_nolen_const(PL_statname),
+&PL_statcache);
else
PL_laststatval = PerlLIO_stat(SvPV_nolen_const(PL_statname), &
+PL_statcache);
PERLIO_UTF8_CLEAR_CONTEXT;
and implementation of win32_stat in win32.c:
BOOL do_utf8 = isDIR_PARAM_UTF8 && IsWin2000();
...
if ( do_utf8) {
WCHAR buf[MAX_PATH+1];
l = MultiByteToWideChar(CP_UTF8, 0, path, -1, buf, MAX_PATH+1)
+;
path = (char*) PerlDir_mapW(buf);
} else {
path = PerlDir_mapA(path);
l = strlen(path);
}
...
res = do_utf8 ?
wstat(( WCHAR*) path, sbuf) :
stat(path, sbuf);
This way the new functionality won't have any effect on platforms without utf8 filenames. Early criticism is welcome. |