汉字拼音的一个解决方法 - 中国WEB开发者网络 (http://www.webasp.net) -- 技术教程 (http://www.webasp.net/article/) --- 汉字拼音的一个解决方法 (http://www.webasp.net/article/29/28031.htm) |
| -- 作者:未知 -- 发布日期: 2006-12-08 |
Author:水如烟 暂歇一下写那个区划方案。 平常中,经常用到汉字转拼音,比如批量生成姓名->拼音作为登录帐号。 这个方法只是简单的利用汉字拼音库。至于怎么找这个库,网上多有介绍。在最后提供下载的方案中也提供了这个库文本文件。 主要代码如下: Imports System.IO Imports System.Text.RegularExpressions Namespace Businness.PinYin Public Class PYService Private gDataSet As New dsPinYin ''' <summary> ''' 汉字表 ''' </summary> Public ReadOnly Property PinYinTable() As dsPinYin.PinYinDataTable Get Return gDataSet.PinYin End Get End Property Private gTxtFile As String = AppDomain.CurrentDomain.SetupInformation.ApplicationBase & "pinyin.txt" Private gxmlFile As String = AppDomain.CurrentDomain.SetupInformation.ApplicationBase & "pinyin.xml" Private gRegex As New Regex("(?<Word>^[\u4e00-\u9fa5]+)(?<PingYin>.*)") ''' <summary> ''' 加载汉字库,文件名为pinyin.xml,在程序当前目录下 ''' </summary> Public Sub Load() If Not IO.File.Exists(gxmlFile) Then Throw New Exception(String.Format("文件{0}不存在", gxmlFile)) End If DataSetInitialize() gDataSet.ReadXml(gxmlFile) End Sub ''' <summary> ''' 从汉字文件中更新,文件名为pinyin.txt,在程序当前目录下 ''' </summary> ''' <remarks></remarks> Public Sub Update() If Not IO.File.Exists(gTxtFile) Then Throw New Exception(String.Format("文件{0}不存在", gTxtFile)) End If UpdateFromTxt(gTxtFile) End Sub ''' <summary> ''' 保存汉字库,文件为pingyin.xml,在程序当前目录下 ''' </summary> ''' <remarks></remarks> Public Sub Save() gDataSet.WriteXml(gxmlFile) End Sub Private Sub DataSetInitialize() '在更新或读入时,清除 Me.gDataSet.Clear() Me.gDataSet.AcceptChanges() End Sub Private Sub UpdateFromTxt(ByVal file As String) DataSetInitialize() Dim mLine As String Dim mBuilder As New System.Text.StringBuilder Dim mReader As New IO.StreamReader(file, System.Text.Encoding.Default) Do mLine = mReader.ReadLine Add(mLine) Loop Until String.IsNullOrEmpty(mLine) mReader.Close() mReader.Dispose() Me.gDataSet.PinYin.AcceptChanges() End Sub Private Sub Add(ByVal line As String) If line Is Nothing Then Exit Sub With gRegex.Match(line) If .Success Then '只取单字,不取词组 If .Groups("Word").Value.Length = 1 Then Add(.Groups("Word").Value, .Groups("PingYin").Value) End If End If End With End Sub Private Sub Add(ByVal word As String, ByVal py As String) '多音的,拼音间用单个空枨符隔开 py = py.Trim.Replace(" ", " ") Dim mCode As String = ChineseCode(word) Dim mRow As dsPinYin.PinYinRow = Me.gDataSet.PinYin.FindBy代码(mCode) If mRow Is Nothing Then Me.gDataSet.PinYin.AddPinYinRow(word, mCode, py) Else Dim pyArray() As String = py.Split(" "c) For Each s As String In pyArray If Not mRow.拼音.Contains(s) Then mRow.拼音 = String.Concat(mRow.拼音, " " & s) End If Next End If End Sub ''' <summary> ''' 将字符串转为拼音 ''' </summary> ''' <param name="line">字符串</param> ''' <param name="isgetfirst">如是多音字,取第一个拼音</param> Public Function ToPinyin(ByVal line As String, ByVal isgetfirst As Boolean) As String Dim mBuilder As New Text.StringBuilder For Each s As Char In line.ToCharArray If IsTrue(s) Then mBuilder.Append(GetPinyin(s, isgetfirst)) Else mBuilder.Append(s) End If Next Return mBuilder.ToString End Function Private Function GetPinyin(ByVal word As String, ByVal isgetfirst As Boolean) As String Dim mResult As String = word Dim mArray As String() = PinYinArray(ChineseCode(word)) '取拼音组 If Not mArray Is Nothing Then If mArray.Length = 1 Or isgetfirst Then mResult = mArray(0) '单音的 Else mResult = String.Format("({0})", String.Join(",", mArray)) '多音的用括号括住,拼音间用逗号隔开 End If End If Return mResult End Function '取拼音组 Private Function PinYinArray(ByVal code As String) As String() Dim mRow As dsPinYin.PinYinRow = Me.gDataSet.PinYin.FindBy代码(code) If mRow Is Nothing Then Return Nothing Return mRow.拼音.Split(" "c) End Function ''' <summary> ''' 按拼音查字 ''' </summary> ''' <param name="pinyin">拼音</param> Public Function WordArray(ByVal pinyin As String) As String() Dim mRows As dsPinYin.PinYinRow() = CType(Me.gDataSet.PinYin.Select(String.Format("拼音 LIKE '%{0}%'", pinyin)), dsPinYin.PinYinRow()) Dim mResult(-1) As String For i As Integer = 0 To mRows.Length - 1 If Array.IndexOf(mRows(i).拼音.Split(" "c), pinyin) <> -1 Then Me.Append(mResult, mRows(i).汉字) End If Next Return mResult End Function ''' <summary> ''' 按拼音查字 ''' </summary> ''' <param name="pinyin">拼音</param> Public Function Words(ByVal pinyin As String) As String Return String.Concat(WordArray(pinyin)) End Function ''' <summary> ''' 汉字代码 ''' </summary> ''' <param name="word">单个汉字</param> Public Shared Function ChineseCode(ByVal word As String) As String If Not IsTrue(word) Then Return Nothing Dim bytes() As Byte = System.Text.Encoding.Default.GetBytes(word) Return String.Concat(Hex(bytes(0)), Hex(bytes(1))) End Function ''' <summary> ''' 是否是单个汉字 ''' </summary> ''' <param name="word">字符</param> Public Shared Function IsTrue(ByVal word As String) As Boolean If word Is Nothing Then Return False Return System.Text.RegularExpressions.Regex.IsMatch(word, "^[\u4e00-\u9fa5]$") End Function Private Sub Append(ByRef collection As String(), ByVal value As String) ReDim Preserve collection(collection.Length) collection(collection.Length - 1) = value End Sub End Class End Namespace 效果图: |
| webasp.net |