|
以下代码均在winxp+lazarus 1.0.14下测试通过
感谢欧文
测试程序如下:
wb单元:- //*************************************
- //汉字转五笔编码单元
- //作者:孙晓刚
- // 2013-12-24
- //=====================================
- unit uWuBi;
- {$mode objfpc}{$H+}
- interface
- uses
- Classes, SysUtils ,windows;
- //参数AType =0 86版本 ==1 98版 AStr 要转换的字符串 (GBK编码) ,返回ASCII 五笔编码
- function GetWuBi(AType:integer;AStr:string):string;
- implementation
- {$R WBSearch.res}
- Function SwapByte(AiChinese : String) : Integer;
- Var
- ch, cl : Integer;
- Begin
- ch := Ord(AiChinese[1]);
- cl := Ord(AiChinese[2]);
- Result := (ch shl 8) + cl;
- end;
- function IsChinese(Code: Integer): Boolean;
- Const
- SPOS=$8140;
- EPOS=$FEA0;
- begin
- Result := ((Code>=SPOS) And (Code<=EPOS))
- end;
- //0--1
- function InnerFind(FWBType:integer;Code: Integer): String;
- Const
- BaseCode=$8140;
- RESNAME:Array[0..1] of String=('WB86','WB98');
- var
- RS:TResourceStream;
- nPos:DWord;
- Keys:array[0..4] of Char;
- begin
- RS := TResourceStream.Create(Hinstance,RESNAME[FWBType],RT_RCDATA);
- IF RS.Size>0 then
- begin
- nPos := (Code-BaseCode)*5;
- RS.Seek(nPos,soBeginning);
- RS.Read(Keys,5);
- IF Keys[4]='' then
- Result := Format('%s%s%s%s',[Keys[0],Keys[1],Keys[2],Keys[3]])
- else Result := Format('%s%s%s%s %s',[Keys[0],Keys[1],Keys[2],Keys[3],Keys[4]]);
- RS.Free;
- end;
- end;
- function GetWuBi(AType:integer;AStr:string):string;
- var
- aChar:WideChar;
- wStr:WideString;
- Code,nLen:Integer;
- begin
- If Length(AStr)>0 then
- begin
- wStr := AStr;
- nLen := Length(wStr);
- aChar := wStr[nLen];
- Code := SwapByte(aChar);
- If IsChinese(Code) then
- result := UpperCase(InnerFind(AType,Code))
- else result := '';
- end
- else result := '';
- end;
- end.
复制代码 py单元 方式1:- //=============================================================================
- //汉语拼音单元
- //应用场景: 助记符,人名拼音等
- //开发环境: delphi 7
- // windows xp sp3
- //字符编码: GBK / CP936
- //功能: 将给出的中、英文字符串,查表得到其汉语拼音 以及 英文的 混合字符串
- // 中文可以得到其汉语拼音首字母串,或者全拼音字符串
- //限制: 没有实现音调,只限于 GBK 编码,没有支持 unicode, hz码等,
- // 修改代码,使其可以在任意版本delphi执行
- // 没有实现多音字自动使用正确的(如果要那样,就需要很大的词语库)
- //原始版本:某人写的 V4.1
- //修改:去类化,修改命名
- //编写:孙晓刚
- //日期: 2013-11-20
- //=============================================================================
- unit HanyuPinyin;
- {$IFDEF FPC}
- {$MODE Delphi}
- {$ENDIF}
- interface
- uses
- {$IFNDEF FPC}
- Windows,
- {$ELSE}
- {$ENDIF}
- Messages, SysUtils, Classes;
- //函数1:GetHanyuPinyinByChar
- //功能:获得一个非中文字符或者1个中文字符的汉语拼音全拼串
- //参数:AValue -- 1字节 英文/其他单字节字符
- // -- 2字节 中文,GBK/Cp936编码
- //返回:1个字符(1字节/2字节)的全拼字符串
- function GetHanyuPinyinByChar(AValue: pAnsiChar; Len: Integer): AnsiString;
- //函数2:getHanyuPinyinFull
- //功能:获得一个中英文混合字符串的各个字符的拼音码+英文混合字符串,是全拼音
- //参数:avalue -- 中英文混合字符串
- //返回:1个字符串的全拼字符串
- function getHanyuPinyinFull(AValue: AnsiString): AnsiString;
- //函数2:getHanyuPinyinShort
- //功能:获得一个中英文混合字符串的各个字符的拼音码+英文混合字符串,是全拼音
- //参数:avalue -- 中英文混合字符串
- //返回:1个字符串的拼音首字母串(英文取本身,希腊字母,罗马数字都转换成响应的英文表示)
- function GetHanyuPinyinShort(AValue: AnsiString): AnsiString;
- //这个实际上只用在内部,得到首个字节的字符
- //function GetFirstLetter(AValue: pAnsiChar; Len: Integer): AnsiString;
- implementation
- {$I HanyuPinyin.set}
- {$I HanyuPinyin.inc}
- function GetHanyuPinyinByChar(AValue: pAnsiChar; Len: Integer): AnsiString;
- var
- C: AnsiChar;
- Index: Integer;
- begin
- Result := '';
- if (Len > 1) and (AValue[0] >= #129) and (AValue[1] >= #64) then
- begin
- //是否为 GBK 字符
- case AValue[0] of
- #163: // 全角 ASCII
- begin
- C := AnsiChar(Ord(AValue[1]) - 128);
- if C in ['a'..'z', 'A'..'Z', '0'..'9', '(', ')', '[', ']'] then
- Result := C
- else
- Result := '';
- end;
- #162: // 罗马数字
- begin
- if AValue[1] > #160 then
- Result := CharIndex[Ord(AValue[1]) - 160]
- else
- Result := '';
- end;
- #166: // 希腊字母
- begin
- if AValue[1] in [#$A1..#$B8] then
- Result := CharIndex2[Ord(AValue[1]) - $A0]
- else if AValue[1] in [#$C1..#$D8] then
- Result := CharIndex2[Ord(AValue[1]) - $C0]
- else
- Result := '';
- end;
- else
- begin // 获得拼音索引
- Index := hanyu_pinyin_gbk_index[Ord(AValue[0]) - 128, Ord(AValue[1]) - 63];
- if Index = 0 then
- Result := ''
- else
- Result := hanyu_pinyin_index[Index];
- end;
- end;
- end
- else if Len > 0 then
- begin
- //在 GBK 字符集外, 即半角字符
- if AValue[0] in ['a'..'z', 'A'..'Z', '0'..'9'{$ifdef NEEDSymbols}, '(', ')', '[', ']',
- '.', '!', '@', '#', 'py单元2:
- [code]unit py;
- {$mode objfpc}{$H+}
- interface
- uses
- Classes, SysUtils;
- function GetHzPy(HzChar: PAnsiChar; Len: Integer): string;
- function GetHzPyFull(HzChar: string): string;
- function GetPyChars(HzChar: string): string;
- function GetHzPyHead(HzChar: PAnsiChar; Len: Integer): String;
- function CheckIsGB2312(AChar: WideChar): Boolean;
- function StrIsCanShow(const WS: WideString): Boolean;
- function HZ2PY(const HZ: string; ISPYHead: Boolean = True): string;
- implementation
- {$i HzSpDat2.inc}
- function GetHzPy(HzChar: PAnsiChar; Len: Integer): string;
- var
- C: Char;
- Index: Integer;
- begin
- Result := '';
- if (Len > 1) and (HzChar[0] >= #129) and (HzChar[1] >= #64) then
- begin
- //是否为 GBK 字符
- case HzChar[0] of
- #163: // 全角 ASCII
- begin
- C := Chr(Ord(HzChar[1]) - 128);
- if C in ['a'..'z', 'A'..'Z', '0'..'9', '(', ')', '[', ']'] then
- Result := C
- else
- Result := '';
- end;
- #162: // 罗马数字
- begin
- if HzChar[1] > #160 then
- Result := CharIndex[Ord(HzChar[1]) - 160]
- else
- Result := '';
- end;
- #166: // 希腊字母
- begin
- if HzChar[1] in [#$A1..#$B8] then
- Result := CharIndex2[Ord(HzChar[1]) - $A0]
- else if HzChar[1] in [#$C1..#$D8] then
- Result := CharIndex2[Ord(HzChar[1]) - $C0]
- else
- Result := '';
- end;
- else
- begin // 获得拼音索引
- Index := PyCodeIndex[Ord(HzChar[0]) - 128, Ord(HzChar[1]) - 63];
- if Index = 0 then
- Result := ''
- else
- Result := PyMusicCode[Index];
- end;
- end;
- end
- else if Len > 0 then
- begin
- //在 GBK 字符集外, 即半角字符
- if HzChar[0] in ['a'..'z', 'A'..'Z', '0'..'9', '(', ')', '[', ']',
- '.', '!', '@', '#', '
- 以上方式均在winxp+lazarus 1.0.14下测试通过
- 感谢欧文
- 测试程序如下:
- , '%', '^', '&', '*', '-', '+',
- '<', '>', '?', ':', '"'{$endif}] then
- Result := AValue[0]
- else
- Result := '';
- end;
- end;
- function getHanyuPinyinFull(AValue: AnsiString): AnsiString;
- var
- i, len: Integer;
- Py: AnsiString;
- function IsLeadCharOfGBK(C: AnsiChar): Boolean;
- begin
- Result := C >= #129;
- end;
- begin
- Result := '';
- i := 1;
- while i <= Length(AValue) do
- begin
- if IsLeadCharOfGBK(AValue[i]) and (Length(AValue) - i > 0) then
- len := 2
- else
- len := 1;
- Py := GetHanyuPinyinByChar(@AValue[i], len);
- Inc(i, len);
- if (Result <> '') and (Py <> '') then
- Result := Result + ' ' + Py
- else
- Result := Result + Py;
- end;
- end;
- function GetFirstLetter(AValue: pAnsiChar; Len: Integer): AnsiString;
- begin
- Result := Copy(GetHanyuPinyinByChar(AValue, Len), 1, 1);
- end;
- function GetHanyuPinyinShort(AValue: AnsiString): AnsiString;
- var
- i, len: Integer;
- Py: AnsiString;
- function IsLeadCharOfGBK(C: AnsiChar): Boolean;
- begin
- Result := C >= #129;
- end;
- begin
- Result := '';
- i := 1;
- while i <= Length(AValue) do
- begin
- if IsLeadCharOfGBK(AValue[i]) and (Length(AValue) - i > 0) then
- len := 2
- else
- len := 1;
- Py := GetFirstLetter(@AValue[i], len);
- Inc(i, len);
- Result := Result + Py;
- end;
- end;
- end.
复制代码 py单元 方式2:- unit py;
- {$mode objfpc}{$H+}
- interface
- uses
- Classes, SysUtils;
- function GetHzPy(HzChar: PAnsiChar; Len: Integer): string;
- function GetHzPyFull(HzChar: string): string;
- function GetPyChars(HzChar: string): string;
- function GetHzPyHead(HzChar: PAnsiChar; Len: Integer): String;
- function CheckIsGB2312(AChar: WideChar): Boolean;
- function StrIsCanShow(const WS: WideString): Boolean;
- function HZ2PY(const HZ: string; ISPYHead: Boolean = True): string;
- implementation
- {$i HzSpDat2.inc}
- function GetHzPy(HzChar: PAnsiChar; Len: Integer): string;
- var
- C: Char;
- Index: Integer;
- begin
- Result := '';
- if (Len > 1) and (HzChar[0] >= #129) and (HzChar[1] >= #64) then
- begin
- //是否为 GBK 字符
- case HzChar[0] of
- #163: // 全角 ASCII
- begin
- C := Chr(Ord(HzChar[1]) - 128);
- if C in ['a'..'z', 'A'..'Z', '0'..'9', '(', ')', '[', ']'] then
- Result := C
- else
- Result := '';
- end;
- #162: // 罗马数字
- begin
- if HzChar[1] > #160 then
- Result := CharIndex[Ord(HzChar[1]) - 160]
- else
- Result := '';
- end;
- #166: // 希腊字母
- begin
- if HzChar[1] in [#$A1..#$B8] then
- Result := CharIndex2[Ord(HzChar[1]) - $A0]
- else if HzChar[1] in [#$C1..#$D8] then
- Result := CharIndex2[Ord(HzChar[1]) - $C0]
- else
- Result := '';
- end;
- else
- begin // 获得拼音索引
- Index := PyCodeIndex[Ord(HzChar[0]) - 128, Ord(HzChar[1]) - 63];
- if Index = 0 then
- Result := ''
- else
- Result := PyMusicCode[Index];
- end;
- end;
- end
- else if Len > 0 then
- begin
- //在 GBK 字符集外, 即半角字符
- if HzChar[0] in ['a'..'z', 'A'..'Z', '0'..'9', '(', ')', '[', ']',
- '.', '!', '@', '#', '
- 以上方式均在winxp+lazarus 1.0.14下测试通过
- 感谢欧文
- 测试程序如下:
- , '%', '^', '&', '*', '-', '+',
- '<', '>', '?', ':', '"'] then
- Result := HzChar[0]
- else
- Result := '';
- end;
- end;
- function GetHzPyFull(HzChar: string): string;
- var
- i, len: Integer;
- Py: String;
- function IsDouByte(C: Char): Boolean;
- begin
- Result := C >= #129;
- end;
- begin
- Result := '';
- i := 1;
- while i <= Length(HzChar) do
- begin
- if IsDouByte(HzChar[i]) and (Length(HzChar) - i > 0) then
- len := 2
- else
- len := 1;
- Py := GetHzPy(@HzChar[i], len);
- Inc(i, len);
- if (Result <> '') and (Py <> '') then
- Result := Result + ' ' + Py
- else
- Result := Result + Py;
- end;
- end;
- function GetPyChars(HzChar: string): string;
- var
- i, len: Integer;
- Py: String;
- function IsDouByte(C: Char): Boolean;
- begin
- Result := C >= #129;
- end;
- begin
- Result := '';
- i := 1;
- while i <= Length(HzChar) do
- begin
- if IsDouByte(HzChar[i]) and (Length(HzChar) - i > 0) then
- len := 2
- else
- len := 1;
- Py := GetHzPyHead(@HzChar[i], len);
- Inc(i, len);
- Result := Result + Py;
- end;
- end;
- function GetHzPyHead(HzChar: PAnsiChar; Len: Integer): String;
- begin
- Result := Copy(GetHzPy(HzChar, Len), 1, 1);
- end;
- function CheckIsGB2312(AChar: WideChar): Boolean;
- var
- S: AnsiString;
- begin
- S := AChar;
- Result := (PByte(integer(S)+1)^>=$A1) and (PByte(integer(S)+1)^<=$FE) and
- (PByte(S)^>=$B0) and (PByte(S)^<=$F7);
- end;
- function StrIsCanShow(const WS: WideString): Boolean;
- var
- i: integer;
- P: PWideChar;
- begin
- Result := True;
- P := Pointer(WS);
- for i:=1 to Length(WS) do begin
- if not (
- ((PWord(P)^>=$20) and (PWord(P)^<=$7E)) //Ansi 可见字符
- or CheckIsGB2312(P^) //GB2312汉字及符号
- ) then begin
- Result := False;
- Break;
- end;
- Inc(P);
- end;
- end;
- function HZ2PY(const HZ: string; ISPYHead: Boolean): string;
- begin
- if ISPYHead then Result := Trim(GetPyChars(Trim(HZ)))
- else Result := Trim(GetHzPyFull(Trim(HZ)));
- if not StrIsCanShow(Result) then Result := '';
- end;
- end.
复制代码 |
|