From 7fefdbe95ab49fd9f9bfb818ae28f7098b209056 Mon Sep 17 00:00:00 2001 From: Rockford Wei Date: Tue, 21 Nov 2017 17:16:56 -0500 Subject: [PATCH] Upgrade to more Swifty API. --- README.md | 42 ++++++++------ README.zh_CN.md | 42 ++++++++------ Sources/Regex/Regex.swift | 96 +++++++++++++++++++++++++++++++ Tests/RegexTests/RegexTests.swift | 46 +++++++++------ test.sh | 21 +++---- 5 files changed, 188 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index 76ee585..91b3d39 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,10 @@ Ensure you have installed and activated the latest Swift 4.0 tool chain. Add this project as a dependency in your Package.swift file. ``` swift -.Package(url:"https://github.com/PerfectSideRepos/Perfect-RegEx.git", majorVersion: 3) +.package(url:"https://github.com/PerfectSideRepos/Perfect-RegEx.git", from: "3.1.0") + +... +dependencies: ["RegEx"] ``` Then please add the following line to the beginning part of swift sources: @@ -62,34 +65,41 @@ import Regex ## Quick Start -The following demo shows how to extract substrings with a pattern, for example, phone numbers: +The following demo shows how to extract substring ranges with a pattern: ``` swift -let text = "this is a long test. The correct number is 123-456-7890 or 647-237-8888 but please don't deal because it's not my number. 我的电话我可不告诉你是 416-970-8888 🇨🇳 🇨🇦 " +var source = "there is a bloody bad bread on my bed." + +let ranges = source.match(pattern: "b[a-z]+d") +// it will figure out the range of `blood`, `bad` `bread` and `bed` + +// you can do further operations, such as remove: +source.removeSubrange(ranges[0]) -let found = text.matches() -print(found) +print(source) +// the result should be: +// there is a y bad bread on my bed. ``` ## API Info -There is only one function in this library, called `matches()`: ``` swift extension String { - public func matches(pattern: String = "[0-9]{3}-[0-9]{3}-[0-9]{4}", limitation: Int = 32) - -> [(rangeBegin: Int, rangeEnd: Int, extraction: String)] + /// test if the string contains certain pattern + /// - parameters: + /// - pattern: string to recognize + /// - return: true for found + public func contains(pattern: String) -> Bool + + /// find string ranges + /// - parameters: + /// - pattern: string to recognize + /// - return: a string range array + public func match(pattern: String) -> [Range] } ``` -### Parameters: -- pattern: String, the regular expression; default value is a typical phone number, like 123-456-7890 -- limitation: Int, the maximum number of matches allowed to find; default is 32, which means the first 32 needles would be found and save from the stack - -### Returns: - -`[(rangeBegin: Int, rangeEnd: Int, extraction: String)]` - a tuple array, each element is the range begin / end mark, with the extraction value; if nothing found or error happened, the result set will be empty. - ## Issues We are transitioning to using JIRA for all bugs and support related issues, therefore the GitHub issues has been disabled. diff --git a/README.zh_CN.md b/README.zh_CN.md index c7e1286..513f67f 100644 --- a/README.zh_CN.md +++ b/README.zh_CN.md @@ -56,8 +56,10 @@ 请在您的Package.swift 文件中增加如下依存关系: ``` swift +.package(url:"https://github.com/PerfectSideRepos/Perfect-RegEx.git", from: "3.1.0") -.Package(url:"https://github.com/PerfectSideRepos/Perfect-RegEx.git", majorVersion: 3) +... +dependencies: ["RegEx"] ``` @@ -71,34 +73,40 @@ import Regex ## 快速上手 -以下示范代码展示了如何从字符串内提取电话号码: +以下示范代码展示了如何从字符串内寻找特定模式的内容,并删除: ``` swift -let text = "this is a long test. The correct number is 123-456-7890 or 647-237-8888 but please don't deal because it's not my number. 我的电话我可不告诉你是 416-970-8888 🇨🇳 🇨🇦 " +var source = "there is a bloody bad bread on my bed." -let found = text.matches() -print(found) +let ranges = source.match(pattern: "b[a-z]+d") +// 返回结果将会找到`blood`, `bad` `bread` and `bed` + +// 同时进一步操作,比如删除选项 +source.removeSubrange(ranges[0]) + +print(source) +// 结果字符串变成了 +// there is a y bad bread on my bed. ``` ## API 参考 -本函数库内只有一个函数,名为`matches()`: - ``` swift extension String { - public func matches(pattern: String = "[0-9]{3}-[0-9]{3}-[0-9]{4}", limitation: Int = 32) - -> [(rangeBegin: Int, rangeEnd: Int, extraction: String)] + /// 检查字符串是否包含某种模式 + /// - 参数: + /// - pattern: 待识别内容 + /// - 返回值: 如果找到返回真值 + public func contains(pattern: String) -> Bool + + /// 找到所有符合条件的字串范围信息 + /// - 参数: + /// - pattern: 待识别内容 + /// - 返回值: 范围信息数组 + public func match(pattern: String) -> [Range] } ``` -### 参数: -- pattern: String, 正则表达式,默认为一个电话号码,比如 123-456-7890 -- limitation: Int, 允许检索的最大结果数量限制。默认为32,意味着只有前32个结果能够返回 - -### 返回值: - -`[(rangeBegin: Int, rangeEnd: Int, extraction: String)]` - 一个元组数组,每个元素分别有三个单元,依次为检索结果的起点为止、终点位置,以及随后提取的匹配字符串。如果没找到或者出错,返回值为空数组。 - ### 问题报告、内容贡献和客户支持 我们目前正在过渡到使用JIRA来处理所有源代码资源合并申请、修复漏洞以及其它有关问题。因此,GitHub 的“issues”问题报告功能已经被禁用了。 diff --git a/Sources/Regex/Regex.swift b/Sources/Regex/Regex.swift index 89ecf77..10f4330 100644 --- a/Sources/Regex/Regex.swift +++ b/Sources/Regex/Regex.swift @@ -23,14 +23,110 @@ import SwiftGlibc import Darwin #endif +public class RegEx { + var reg = regex_t() + public init?(_ pattern: String) { + guard 0 == regcomp(®, pattern, REG_EXTENDED) else { + return nil + } + } + deinit { + regfree(®) + } + + /// test if the current string contains a certain pattern + /// - parameters: + /// - string: string to search + /// - returns: true if found + public func exists( _ string: String) -> Bool { + return match(string).count > 0 + } + + /// using regular expression to extract substrings + /// - parameters: + /// - string: String to search + /// - limitation: Int, the maximum number of matches allowed to find + /// - returns: + /// [Range] - an array, each element is a range of match + public func match(_ string: String) -> [Range] { + + // set up an empty result set + var found = [Range]() + + // prepare pointers + guard let me = strdup(string) else { + return found + } + + // string length + let sz = Int(string.count) + let limitation = sz + + // cursor of the string buffer + var cursor = me + + // allocate a buffer for the outcomes + let m = UnsafeMutablePointer.allocate(capacity: limitation) + defer { + m.deallocate(capacity: limitation) + free(me) + } + + // loop until all matches were found + while 0 == regexec(®, cursor, limitation, m, 0) { + + // retrieve each matches from the pointer buffer + for i in 0 ... limitation - 1 { + + // if reach the end, the position marker will be -1 + let p = m.advanced(by: i).pointee + guard p.rm_so > -1 else { + break + }//end guard + + // append outcomes to return set + let start = String.Index.init(encodedOffset: Int(p.rm_so)) + let end = String.Index.init(encodedOffset: Int(p.rm_eo)) + found.append(start ..< end) + }//next i + + cursor = cursor.advanced(by: Int(m.pointee.rm_eo)) + } + + return found + } +} + extension String { + /// test if the string contains certain pattern + /// - parameters: + /// - pattern: string to recognize + /// - return: true for found + public func contains(pattern: String) -> Bool { + guard let reg = RegEx(pattern) else { + return false + } + return reg.exists(self) + } + + /// find string ranges + /// - parameters: + /// - pattern: string to recognize + /// - return: a string range array + public func match(pattern: String) -> [Range] { + guard let reg = RegEx(pattern) else { + return [] + } + return reg.match(self) + } /// using regular expression to extract substrings /// - parameters: /// - pattern: String, the regular expression; default value is a typical phone number, like 123-456-7890 /// - limitation: Int, the maximum number of matches allowed to find; default is 32, which means the first 32 needles would be found and save from the stack /// - returns: /// [(Int, Int, String)] - a turple array, each element is the range begin / end mark, with the extraction value; if nothing found or error happened, the result set will be empty. + @available(*, deprecated) public func matches(pattern: String = "[0-9]{3}-[0-9]{3}-[0-9]{4}", limitation: Int = 32) -> [(rangeBegin: Int, rangeEnd: Int, extraction: String)] { // set up an empty result set diff --git a/Tests/RegexTests/RegexTests.swift b/Tests/RegexTests/RegexTests.swift index be02123..a63c9e8 100644 --- a/Tests/RegexTests/RegexTests.swift +++ b/Tests/RegexTests/RegexTests.swift @@ -21,28 +21,42 @@ import XCTest @testable import Regex class RegexTests: XCTestCase { - func testExample() { - let test = "this is a long test. The correct number is 123-456-7890 or 647-237-8888 but please don't deal because it's not my number. 我的电话我可不告诉你是 416-970-8888" - let outcome = test.matches() - print(outcome) - XCTAssertEqual(outcome.count, 3) - XCTAssertEqual(outcome[0].2, "123-456-7890") - XCTAssertEqual(outcome[1].2, "647-237-8888") - XCTAssertEqual(outcome[2].2, "416-970-8888") - } + static var allTests : [(String, (RegexTests) -> () throws -> Void)] { + return [ + ("testRanges", testRanges), + ("testSlack", testSlack), + ("testExample", testExample) + ] + } + + func testRanges() { + XCTAssertTrue("I have a dream.".contains(pattern: "d(.*)m")) + var source = "there is a bloody bad bread on my bed." + let ranges = source.match(pattern: "b[a-z]+d") + XCTAssertEqual(ranges.count, 4) + print("ranges", ranges) + source.removeSubrange(ranges[0]) + print("remove bloody", source) + } + + @available(*, deprecated) + func testExample() { + let test = "this is a long test. The correct number is 123-456-7890 or 647-237-8888 but please don't deal because it's not my number. 我的电话我可不告诉你是 416-970-8888" + let outcome = test.matches() + print(outcome) + XCTAssertEqual(outcome.count, 3) + XCTAssertEqual(outcome[0].2, "123-456-7890") + XCTAssertEqual(outcome[1].2, "647-237-8888") + XCTAssertEqual(outcome[2].2, "416-970-8888") + } + + @available(*, deprecated) func testSlack() { let source = "send you <@U4MNSLWMD> :cookie: <@hello> <@myboy123>" let u = source.matches(pattern: "<@[^>]+>") print(u) XCTAssertEqual(u.count, 3) } - - static var allTests : [(String, (RegexTests) -> () throws -> Void)] { - return [ - ("testExample", testExample), - ("testSlack", testSlack) - ] - } } diff --git a/test.sh b/test.sh index 24e39a6..60c365e 100755 --- a/test.sh +++ b/test.sh @@ -1,13 +1,14 @@ +SRCPACK=/tmp/pr.tgz +tar czf $SRCPACK Sources Tests Package.swift +REPO=/tmp/reg.linux +mkdir -p $REPO +pushd . +echo "-------------- LINUX SWIFT 4.0 ----------------" +cd $REPO +tar xzf $SRCPACK +docker pull rockywei/swift:4.0 +docker run -it -v $REPO:/home -w /home rockywei/swift:4.0 /bin/bash -c "swift build -c release && swift test" echo "-------------- OS X / Xcode ----------------" -rm -rf .build -rm -rf Package.pins -rm -rf Package.resolved -swift build +popd swift build -c release swift test -echo "-------------- LINUX SWIFT 4.0 ----------------" -rm -rf .build_linux -rm -rf Package.resolved -docker pull rockywei/swift:4.0 -docker run -it -v $PWD:/home rockywei/swift:4.0 /bin/bash -c "cd /home;swift build --build-path=.build_linux; swift build -c release --build-path=.build_linux;swift test --build-path=.build_linux" -