Clucene C++编码转换问题
作者: 火车头 日期: 2008-02-28 05:04
在做Clucene与lucene生成的Index文件相互兼容时,遇到了编码转换问题。它们的兼容性对于非英文的编码可能都会存在这样的问题,经过跟踪clucene程序,发现它用的是unicode编码方式储蓄,因此,要先把字符串或文件转换成unicode编码,然后再进行其它处理。
转换的具体代码如下(Linux与vc6.0测试通过):
#ifndef _UNIX
static inline int codepage(const char* code_page)
{
return 936;//"GBK"
}
#endif
static inline int mb2wc(const char* code_page,/*in*/const char* in,int in_len,
/*out*/wchar_t* out,int out_max)
{
#ifdef _UNIX
size_t result;
iconv_t env;
env = iconv_open("WCHAR_T",code_page);
result = iconv(env,(char**)&in,(size_t*)&in_len,(char**)&out,(size_t*)&out_max);
iconv_close(env);
return (int) result;
#else
return ::MultiByteToWideChar(codepage(code_page),0,in,in_len,out,out_max);
#endif
}
/*out*/wchar_t* out,int out_max)
{
#ifdef _UNIX
size_t result;
iconv_t env;
env = iconv_open("WCHAR_T",code_page);
result = iconv(env,(char**)&in,(size_t*)&in_len,(char**)&out,(size_t*)&out_max);
iconv_close(env);
return (int) result;
#else
return ::MultiByteToWideChar(codepage(code_page),0,in,in_len,out,out_max);
#endif
}
static inline int wc2mb(const char* code_page,/*in*/const wchar_t* in,int in_len,
/*out*/char* out,int out_max)
{
#ifdef _UNIX
size_t result;
iconv_t env;
env = iconv_open(code_page,"WCHAR_T");
result = iconv(env,(char**)&in,(size_t*)&in_len,(char**)&out,(size_t*)&out_max);
iconv_close(env);
return (int) result;
#else
return ::WideCharToMultiByte(codepage(code_page),0,in,-1,out,out_max, NULL, NULL);
#endif
}
/*out*/char* out,int out_max)
{
#ifdef _UNIX
size_t result;
iconv_t env;
env = iconv_open(code_page,"WCHAR_T");
result = iconv(env,(char**)&in,(size_t*)&in_len,(char**)&out,(size_t*)&out_max);
iconv_close(env);
return (int) result;
#else
return ::WideCharToMultiByte(codepage(code_page),0,in,-1,out,out_max, NULL, NULL);
#endif
}
void str_to_UnicodeChar(const char* strIn,TCHAR* &strOut){
if(!strIn)
return;
if(!strIn)
return;
int i= mb2wc("936",(char*)strIn, -1, NULL, 0);
strOut = (TCHAR*)malloc(sizeof(TCHAR)*i);
mb2wc("936",(char*)strIn, -1, strOut, i);
}
void UnicodeChar_to_str(const TCHAR* strIn,char* &strOut){
if(!strIn)
return;
int i = wc2mb("936",strIn,-1,NULL,0);
strOut = new char[i+1];
wc2mb("936", strIn, -1, strOut, i);
strOut[i] = 0;
}
strOut = (TCHAR*)malloc(sizeof(TCHAR)*i);
mb2wc("936",(char*)strIn, -1, strOut, i);
}
void UnicodeChar_to_str(const TCHAR* strIn,char* &strOut){
if(!strIn)
return;
int i = wc2mb("936",strIn,-1,NULL,0);
strOut = new char[i+1];
wc2mb("936", strIn, -1, strOut, i);
strOut[i] = 0;
}
void tchar_to_str(const const TCHAR* strIn ,char* &strOut){
int i=0;
if(!strIn)
return ;
strOut = new char[1024];
while(*strIn) {
strOut[i]=*strIn++;
i++;
}
strOut[i]='\0';
}
int i=0;
if(!strIn)
return ;
strOut = new char[1024];
while(*strIn) {
strOut[i]=*strIn++;
i++;
}
strOut[i]='\0';
}
评论: 8 |
引用: 0 |
阅读: 2785
| Bullpen coach Derek canada goose kensington parka goose parka[/url] Jason Motte play catch. But Lilliquist never heard the Motte part.La Russa brought in moncler jackets for men Rzepczynski to moncler jackets for women realized there wasn’t a righthander warming up. He had to leave Rzepczynski to pitch to the dangerous Napoli, a righthanded hitter, and Napoli hit a game-winning two-run Moncler Clothing double. |
发表评论
订阅
上一篇
返回
下一篇
CHI Original Flat Iron,
chi hair dryer,
chi hair irons,
chi hair straighteners,
chi hair tools,
ceramic flat iron,
chi straightening iron,
chi hair straightening iron,