從化學式中提取數字-包括括號內的多個元素-有解無憂

我在電子表格中有數千個化學式的串列，我想計算每個化學式中每個化學元素出現的次數。這里給出了一些例子：

CH ₃ NO ₃
CSe ₂
C ₂ Cl ₂
C ₂ Cl ₂ O ₂
C ₄ H ₆ COOH
(C ₆ H ₅ ) ₂ P(CH ₂ ) ₆ P(C ₆ H ₅ ) ₂

從化學式中提取數字 - 包括括號內的多個元素

我發現@PEH（從化學式中提取數字 - 包括括號內的多個元素

Private RegEx As RegExp

Function CountElements(ChemFormulaRange As Variant, ElementRange As Variant) As Variant

'define variables
Dim RetValRange() As Long
Dim RetVal As Long
Dim ChemFormula As String
Dim npoints As Long
Dim i As Long
Dim mpoints As Long
Dim j As Long

' Connvert input ranges to variant arrays
If TypeName(ChemFormulaRange) = "Range" Then ChemFormulaRange = ChemFormulaRange.Value
If TypeName(ElementRange) = "Range" Then ElementRange = ElementRange.Value

'parameter
npoints = UBound(ChemFormulaRange, 1) - LBound(ChemFormulaRange, 1)   1
mpoints = UBound(ElementRange, 2) - LBound(ElementRange, 2)   1

'dimension array
ReDim RetValRange(1 To npoints, 1 To mpoints)

If RegEx Is Nothing Then
    Set RegEx = New RegExp
    ' apply the properties
End If

'calculate all values
For j = 1 To mpoints
    Element = ElementRange(1, j)
        For i = 1 To npoints
        RetVal = 0
        ChemFormula = ChemFormulaRange(i, 1)
            Call ChemRegex(ChemFormula, Element, RetVal, RegEx)
        RetValRange(i, j) = RetVal
        Next i
Next j

'output answer
CountElements = RetValRange

End Function
Private Sub ChemRegex(ChemFormula, Element, RetVal, RegEx)
    
'ChemRegex created by PEH (CC BY-SA 4.0) https://stackoverflow.com/a/46091904/17194644
    
    With RegEx
        .Global = True
        .MultiLine = True
        .IgnoreCase = False
    End With
    
    'first pattern matches every element once
    RegEx.Pattern = "([A][cglmrstu]|[B][aehikr]?|[C][adeflmnorsu]?|[D][bsy]|[E][rsu]|[F][elmr]?|[G][ade]|[H][efgos]?|[I][nr]?|[K][r]?|[L][airuv]|[M][cdgnot]|[N][abdehiop]?|[O][gs]?|[P][abdmortu]?|[R][abefghnu]|[S][bcegimnr]?|[T][abcehilms]|[U]|[V]|[W]|[X][e]|[Y][b]?|[Z][nr])([0-9]*)"
    
    Dim Matches As MatchCollection
    Set Matches = RegEx.Execute(ChemFormula)
    
    Dim m As Match
    For Each m In Matches
        If m.SubMatches(0) = Element Then
            RetVal = RetVal   IIf(Not m.SubMatches(1) = vbNullString, m.SubMatches(1), 1)
        End If
    Next m
    
End Sub

uj5u.com熱心網友回復：

看起來您像我上次建議的那樣修改了代碼以重用 RegExp 物件，實際上我希望這能顯著提高性能。但是，我應該更好地解釋如何實作，但請參閱下面的示例。

在示例中，我還采用了您的第二個 RegExp 模式，但重新設計了其余的模式。這個例子對我來說似乎對你的樣本資料有用，但這就是我測驗過的！

Option Explicit    
Private regEx As RegExp
Private regEx2 As RegExp

Sub Test()
' formulas in A2:A7 and elements in B1:H1 (see OP's screenshot), return results in B2:H7
    Range("B2:H7").Value = CountElements(Range("A2:A7").Value, Range("B1:H1"))
End Sub

Function CountElements(ChemFormulaRange As Variant, ElementRange As Variant) As Variant
Dim RetValRange() As Long
Dim RetVal As Long
Dim ChemFormula As String
Dim i As Long, j As Long
Dim mpoints As Long, npoints As Long
Dim Element As String

    If regEx Is Nothing Then
        Set regEx = New RegExp
        With regEx
            .Global = True
            '.MultiLine = True ' ? only if working with multilines
            .IgnoreCase = False
            
            'first pattern matches every element once
            .Pattern = "([A][cglmrstu]|[B][aehikr]?|[C][adeflmnorsu]?|[D][bsy]|[E][rsu]|[F][elmr]?|[G][ade]|[H][efgos]?|[I][nr]?|[K][r]?|[L][airuv]|[M][cdgnot]|[N][abdehiop]?|[O][gs]?|[P][abdmortu]?|[R][abefghnu]|[S][bcegimnr]?|[T][abcehilms]|[U]|[V]|[W]|[X][e]|[Y][b]?|[Z][nr])([0-9]*)"
        End With
        
        Set regEx2 = New RegExp
        With regEx2
            .Global = True
            '.MultiLine = True ?
            .IgnoreCase = False

            'second patternd finds parenthesis and multiplies elements within
            .Pattern = "(\((. ?)\)([0-9]) ) ?"
        End With
    End If


    ' Convert input ranges to variant arrays
    If TypeName(ChemFormulaRange) = "Range" Then ChemFormulaRange = ChemFormulaRange.Value
    If TypeName(ElementRange) = "Range" Then ElementRange = ElementRange.Value

    'parameter
    npoints = UBound(ChemFormulaRange, 1) - LBound(ChemFormulaRange, 1)   1
    mpoints = UBound(ElementRange, 2) - LBound(ElementRange, 2)   1

    'dimension arrays
    ReDim RetValRange(1 To npoints, 1 To mpoints)

    'calculate all values
    For i = 1 To npoints
        ChemFormula = ChemFormulaRange(i, 1)
        For j = 1 To mpoints
            RetVal = 0
            Element = ElementRange(1, j)
            Call ChemRegex(ChemFormula, Element, RetVal)
            RetValRange(i, j) = RetVal
        Next
    Next

    'output answer
    CountElements = RetValRange
    
   ' Set regEx = Nothing: Set regEx2 = Nothing

End Function

Private Sub ChemRegex(ChemFormula, Element, RetVal)
Dim Matches As MatchCollection, Matches2 As MatchCollection
Dim m As Match, m2 As Match
    
    Set Matches = regEx.Execute(ChemFormula)
    For Each m In Matches
        If m.SubMatches(0) = Element Then
            RetVal = RetVal   IIf(Not m.SubMatches(1) = vbNullString, m.SubMatches(1), 1)
        End If
    Next m

    If InStr(1, ChemFormula, "(") Then ' if the formula includes elements within parentheses
        Set Matches2 = regEx2.Execute(ChemFormula)
        For Each m2 In Matches2
            Set Matches = regEx.Execute(m2.Value)
            For Each m In Matches
                If m.SubMatches(0) = Element Then
                    If m.SubMatches(1) = vbNullString Then
                        RetVal = RetVal   m2.SubMatches(2) - 1
                    Else
                        RetVal = RetVal   m.SubMatches(1) * (m2.SubMatches(2) - 1)
                    End If
                End If
            Next
        Next m2
    End If

End Sub

這當然可以通過按照 CDP1802 的建議在一個 RegExp 執行中測驗所有元素來進一步改進，但我會把它留給你！

uj5u.com熱心網友回復：

通過在一次正則運算式執行中提取所有元素而不是一次只提取一個元素，您可能會獲得性能改進。

Option Explicit

Sub Demo()

    Dim lastrow As Long, lastcol As Long
    Dim c As Long, r As Long, d As Object
    Dim f As String, el As String, ar
    Set d = CreateObject("Scripting.Dictionary")
    
    With Sheet1
        lastrow = .Cells(.Rows.Count, "A").End(xlUp).Row
        lastcol = .Cells(1, .Columns.Count).End(xlToLeft).Column
        ar = .Cells(1, 1).Resize(lastrow, lastcol)
        
        For r = 2 To lastrow
            f = ar(r, 1)
            Call parse(d, f)
            For c = 2 To lastcol
               el = ar(1, c)
               If d.exists(el) Then
                   ar(r, c) = d(el)
               End If
            Next
            d.RemoveAll
        Next
        .Cells(1, 1).Resize(lastrow, lastcol) = ar
    End With
    
    MsgBox "Done"
    
End Sub

Sub parse(ByRef dict, s As String)
  
    Dim regex As Object
    Set regex = CreateObject("VBScript.RegExp")
    With regex
        .Global = False
        .Pattern = "[(]([^)] )[)](\d )"
    End With
    
    ' expand bracket into multiple entries
    Dim m, matches, sm, n As Long, el As String
    Do While regex.test(s)
        Set m = regex.Execute(s)
        For n = 1 To m(0).submatches(1)
            s = s & " " & m(0).submatches(0)
        Next
        s = regex.Replace(s, "")
    Loop
    
    ' count elements
    regex.Pattern = "(" & Symbols & ")([0-9]*)"
    regex.Global = True
    If regex.test(s) Then
        Set matches = regex.Execute(s)
        For Each m In matches
            el = m.submatches(0)
            n = Val(m.submatches(1))
            If n = 0 Then n = 1
            dict(el) = dict(el)   n
        Next
    End If
    
End Sub

Function Symbols() As String

   Symbols = "A[cglmrstu]|" & _
        "B[aehikr]?|" & _
        "C[adeflmnorsu]?|" & _
        "D[bsy]|" & _
        "E[rsu]|" & _
        "F[elmr]?|" & _
        "G[ade]|" & _
        "H[efgos]?|" & _
        "I[nr]?|" & _
        "K[r]?|" & _
        "L[airuv]|" & _
        "M[cdgnot]|" & _
        "N[abdehiop]?|" & _
        "O[gs]?|" & _
        "P[abdmortu]?|" & _
        "R[abefghnu]|" & _
        "S[bcegimnr]?|" & _
        "T[abcehilms]|" & _
        "[UVW]|" & _
        "X[e]|" & _
        "Y[b]?|" & _
        "Z[nr]"

End Function

轉載請註明出處，本文鏈接：https://www.uj5u.com/caozuo/380749.html

標籤：数组擅长正则表达式 vba

上一篇：VBA從一張紙復制資料并粘貼到一個范圍內，然后使用LOOP或任何其他方法重復

下一篇：為什么在參考具有單元格值VBA的范圍時出現錯誤“物件的方法范圍_Global失敗”？[復制]