app中收发消息,有些有@名字后面的nbsp,是来自于web端产生的:
<a ng-click="selectMessageType(0);openChat(‘user-93dcd6b9-ff71-4cb3-8bf9-3f516f0de1a8’)" style="color:#1878c7;font-weight:bold;" class="ng-scope">@Anglia</a>
<span class="ng-scope"> 只是测试at功能</span>
此处iOS的app内收到的是:
"text" : "@Anglia 只是测试at功能",
用TextView显示,对于nbsp则是乱码:
此处的目的是:
对于收到的文本String,检测其中如何包含
nbsp的Unicode字符串
则就直接替换为空格
swift unicode nbsp
json – How do I decode HTML entities in swift? – Stack Overflow
此处,得知:
" " : "\u{00a0}"
Unicode Character ‘NO-BREAK SPACE’ (U+00A0)
swift decode html entities
How do I encode HTML entities in swift 2.0? – Stack Overflow
GitHub – TheFlow95/DecodeHTML.swift: A String extension to decode HTML entities to string
结果此处代码:
extension String {
var decodedHtml:String {
var decodedHtmlStr = self
print("decodedHtmlStr=\(decodedHtmlStr)")
do {
if let encodedData = decodedHtmlStr.dataUsingEncoding(NSUTF8StringEncoding) {
let attributedOptions : [String: AnyObject] = [
NSDocumentTypeDocumentAttribute : NSHTMLTextDocumentType,
NSCharacterEncodingDocumentAttribute : NSUTF8StringEncoding
]
print("attributedOptions=\(attributedOptions)") //["DocumentType": NSHTML, "CharacterEncoding": 4]
let attributedString = try NSAttributedString(data: encodedData, options: attributedOptions, documentAttributes: nil)
print("attributedString=\(attributedString)")
/*
attributedString=@Anglia again测试{
NSColor = "UIDeviceRGBColorSpace 0 0 0 1";
NSFont = "<UICTFont: 0x7e01b0a0> font-family: \"Times New Roman\"; font-weight: normal; font-style: normal; font-size: 12.00pt";
NSKern = 0;
NSParagraphStyle = "Alignment 4, LineSpacing 0, ParagraphSpacing 0, ParagraphSpacingBefore 0, HeadIndent 0, TailIndent 0, FirstLineHeadIndent 0, LineHeight 15/0, LineHeightMultiple 0, LineBreakMode 0, Tabs (\n), DefaultTabInterval 36, Blocks (\n), Lists (\n), BaseWritingDirection 0, HyphenationFactor 0, TighteningForTruncation NO, HeaderLevel 0";
NSStrokeColor = "UIDeviceRGBColorSpace 0 0 0 1";
NSStrokeWidth = 0;
}
*/
decodedHtmlStr = attributedString.string
print("decodedHtmlStr=\(attributedString)")
}
} catch {
print("decodedHtml error: \(error)")
}
return decodedHtmlStr
}
}
结果还是乱码:
然后才发现:
此处iOS端得到的不是
所以此处就需要:
想办法得知字符串中某个字符的unicode编码
以便于搞清楚到底是输入的是什么字符
搜:
swift get char unicode value
swift – How can I get the Unicode code point(s) of a Character? – Stack Overflow
此处通过:
unicodeScalars
(lldb) po newTextMsg.text.unicodeScalars
▿ StringUnicodeScalarView("@Anglia again测试")
– [0] : "@"
– [1] : "A"
– [2] : "n"
– [3] : "g"
– [4] : "l"
– [5] : "i"
– [6] : "a"
– [7] : "\u{200D}"
– [8] : "\u{00A0}"
– [9] : "a"
– [10] : "g"
– [11] : "a"
– [12] : "i"
– [13] : "n"
– [14] : "\u{6D4B}"
– [15] : "\u{8BD5}"
而得知对应的值是:
\u{200D}
和
\u{00A0}
然后再去找
unicode table
– 零宽连接符 (U+200D) 符号,文字,符号,图标, html: ‍ – 常用标点 – Unicode®字符百科
– 无中断空格 (U+00A0) 符号,文字,符号,图标, html: – 拉丁文补充1 – Unicode®字符百科
ios unicode not support 200d
iphone – Does IOS support all Unicode emojies? – Stack Overflow
Ios7 displays unicode wrong | Apple Support Communities
ios not support some unicode char
ios UITextView unicode
ios UITextView unicode 00a0
ios – UITextView control line breaks – Stack Overflow
Detect Unicode characters in NSString on iPhone – Stack Overflow
ios7 – UITextField Right Alignment iOS 7 – Stack Overflow
搜:
如何在swift中用代码表示unicode字符串
swift unicode char
The Swift Programming Language (Swift 2.2): Strings and Characters
所以,最终的代码为:
[总结]
extension String {
var decodedHtml:String {
var decodedHtmlStr = self
print("decodedHtmlStr=\(decodedHtmlStr)")
do {
if let encodedData = decodedHtmlStr.dataUsingEncoding(NSUTF8StringEncoding) {
let attributedOptions : [String: AnyObject] = [
NSDocumentTypeDocumentAttribute : NSHTMLTextDocumentType,
NSCharacterEncodingDocumentAttribute : NSUTF8StringEncoding
]
print("attributedOptions=\(attributedOptions)") //["DocumentType": NSHTML, "CharacterEncoding": 4]
let attributedString = try NSAttributedString(data: encodedData, options: attributedOptions, documentAttributes: nil)
print("attributedString=\(attributedString)")
/*
attributedString=@Anglia again测试{
NSColor = "UIDeviceRGBColorSpace 0 0 0 1";
NSFont = "<UICTFont: 0x7e01b0a0> font-family: \"Times New Roman\"; font-weight: normal; font-style: normal; font-size: 12.00pt";
NSKern = 0;
NSParagraphStyle = "Alignment 4, LineSpacing 0, ParagraphSpacing 0, ParagraphSpacingBefore 0, HeadIndent 0, TailIndent 0, FirstLineHeadIndent 0, LineHeight 15/0, LineHeightMultiple 0, LineBreakMode 0, Tabs (\n), DefaultTabInterval 36, Blocks (\n), Lists (\n), BaseWritingDirection 0, HyphenationFactor 0, TighteningForTruncation NO, HeaderLevel 0";
NSStrokeColor = "UIDeviceRGBColorSpace 0 0 0 1";
NSStrokeWidth = 0;
}
*/
decodedHtmlStr = attributedString.string
print("decodedHtmlStr=\(attributedString)")
}
} catch {
print("decodedHtml error: \(error)")
}
return decodedHtmlStr
}
}
iflet text = newMessageDict["text"].string {
//let decodedHtmlStr = text.decodedHtml
//newTextMsg.text = decodedHtmlStr
newTextMsg.text = filterUnsupportChar(text)
}
func filterUnsupportChar(originStr:String) ->String {
var filtedStr = originStr
//iOS UITextView not support 200D, so remove it here
filtedStr = filtedStr.replace("\u{200d}", to: "")
print("removed 200d: filtedStr=\(filtedStr)")
//seems iOS support 00A0, so no need to replace it here
// filtedStr = filtedStr.replace("\u{00a0}", to: " ")
// print("replaced 00a0: filtedStr=\(filtedStr)")
return filtedStr
}
输出为:
(lldb) po filtedStr.unicodeScalars
▿ StringUnicodeScalarView("@Anglia test")
– [0] : "@"
– [1] : "A"
– [2] : "n"
– [3] : "g"
– [4] : "l"
– [5] : "i"
– [6] : "a"
– [7] : "\u{200D}"
– [8] : "\u{00A0}"
– [9] : "t"
– [10] : "e"
– [11] : "s"
– [12] : "t"
(lldb) po filtedStr
"@Anglia test"
removed 200d: filtedStr=@Anglia test
replaced 00a0: filtedStr=@Anglia test
[后记 20160422]
后期又出现:
&zwj; 
好像对于上述版本还是没有解码。
但是更严重的问题是:
必须要在主线程中运行此函数,所以还是很麻烦
详见:
[已解决]iOS 8.1中出错:NSAttributedStrin初始化出错:EXC_BAD_ACCESS code=1 address 0xbbadbeef
swift html entity decode
json – How do I decode HTML entities in swift? – Stack Overflow
-》Decoding HTML Entities in Swift
后来看到:
Decoding HTML Entities in Swift
以及:
StringExtensionHTML on CocoaPods.org
-》adela-chang/StringExtensionHTML
-》StringExtensionHTML/StringExtensionHTML.swift at master · adela-chang/StringExtensionHTML
直接利用后者即可。
期间遇到:
[已解决]swift中代码出错:Value of type String has no member extend
[已解决]swift代码出错:distance is unavailable call the distanceTo(end) method on the index
最后用代码:
// Mapping from XML/HTML character entity reference to character
// From http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
let HtmlCharacterEntitiesDict : [String: Character] = [
// XML predefined entities:
""" : "\"",
"&" : "&",
"'" : "’",
"<" : "<",
">" : ">",
// HTML character entity references:
" " : "\u{00A0}",
"¡" : "\u{00A1}",
"¢" : "\u{00A2}",
"£" : "\u{00A3}",
"¤" : "\u{00A4}",
"¥" : "\u{00A5}",
"¦" : "\u{00A6}",
"§" : "\u{00A7}",
"¨" : "\u{00A8}",
"©" : "\u{00A9}",
"ª" : "\u{00AA}",
"«" : "\u{00AB}",
"¬" : "\u{00AC}",
"­" : "\u{00AD}",
"®" : "\u{00AE}",
"¯" : "\u{00AF}",
"°" : "\u{00B0}",
"±" : "\u{00B1}",
"²" : "\u{00B2}",
"³" : "\u{00B3}",
"´" : "\u{00B4}",
"µ" : "\u{00B5}",
"¶" : "\u{00B6}",
"·" : "\u{00B7}",
"¸" : "\u{00B8}",
"¹" : "\u{00B9}",
"º" : "\u{00BA}",
"»" : "\u{00BB}",
"¼" : "\u{00BC}",
"½" : "\u{00BD}",
"¾" : "\u{00BE}",
"¿" : "\u{00BF}",
"À" : "\u{00C0}",
"Á" : "\u{00C1}",
"Â" : "\u{00C2}",
"Ã" : "\u{00C3}",
"Ä" : "\u{00C4}",
"Å" : "\u{00C5}",
"Æ" : "\u{00C6}",
"Ç" : "\u{00C7}",
"È" : "\u{00C8}",
"É" : "\u{00C9}",
"Ê" : "\u{00CA}",
"Ë" : "\u{00CB}",
"Ì" : "\u{00CC}",
"Í" : "\u{00CD}",
"Î" : "\u{00CE}",
"Ï" : "\u{00CF}",
"Ð" : "\u{00D0}",
"Ñ" : "\u{00D1}",
"Ò" : "\u{00D2}",
"Ó" : "\u{00D3}",
"Ô" : "\u{00D4}",
"Õ" : "\u{00D5}",
"Ö" : "\u{00D6}",
"×" : "\u{00D7}",
"Ø" : "\u{00D8}",
"Ù" : "\u{00D9}",
"Ú" : "\u{00DA}",
"Û" : "\u{00DB}",
"Ü" : "\u{00DC}",
"Ý" : "\u{00DD}",
"Þ" : "\u{00DE}",
"ß" : "\u{00DF}",
"à" : "\u{00E0}",
"á" : "\u{00E1}",
"â" : "\u{00E2}",
"ã" : "\u{00E3}",
"ä" : "\u{00E4}",
"å" : "\u{00E5}",
"æ" : "\u{00E6}",
"ç" : "\u{00E7}",
"è" : "\u{00E8}",
"é" : "\u{00E9}",
"ê" : "\u{00EA}",
"ë" : "\u{00EB}",
"ì" : "\u{00EC}",
"í" : "\u{00ED}",
"î" : "\u{00EE}",
"ï" : "\u{00EF}",
"ð" : "\u{00F0}",
"ñ" : "\u{00F1}",
"ò" : "\u{00F2}",
"ó" : "\u{00F3}",
"ô" : "\u{00F4}",
"õ" : "\u{00F5}",
"ö" : "\u{00F6}",
"÷" : "\u{00F7}",
"ø" : "\u{00F8}",
"ù" : "\u{00F9}",
"ú" : "\u{00FA}",
"û" : "\u{00FB}",
"ü" : "\u{00FC}",
"ý" : "\u{00FD}",
"þ" : "\u{00FE}",
"ÿ" : "\u{00FF}",
"Œ" : "\u{0152}",
"œ" : "\u{0153}",
"Š" : "\u{0160}",
"š" : "\u{0161}",
"Ÿ" : "\u{0178}",
"ƒ" : "\u{0192}",
"ˆ" : "\u{02C6}",
"˜" : "\u{02DC}",
"Α" : "\u{0391}",
"Β" : "\u{0392}",
"Γ" : "\u{0393}",
"Δ" : "\u{0394}",
"Ε" : "\u{0395}",
"Ζ" : "\u{0396}",
"Η" : "\u{0397}",
"Θ" : "\u{0398}",
"Ι" : "\u{0399}",
"Κ" : "\u{039A}",
"Λ" : "\u{039B}",
"Μ" : "\u{039C}",
"Ν" : "\u{039D}",
"Ξ" : "\u{039E}",
"Ο" : "\u{039F}",
"Π" : "\u{03A0}",
"Ρ" : "\u{03A1}",
"Σ" : "\u{03A3}",
"Τ" : "\u{03A4}",
"Υ" : "\u{03A5}",
"Φ" : "\u{03A6}",
"Χ" : "\u{03A7}",
"Ψ" : "\u{03A8}",
"Ω" : "\u{03A9}",
"α" : "\u{03B1}",
"β" : "\u{03B2}",
"γ" : "\u{03B3}",
"δ" : "\u{03B4}",
"ε" : "\u{03B5}",
"ζ" : "\u{03B6}",
"η" : "\u{03B7}",
"θ" : "\u{03B8}",
"ι" : "\u{03B9}",
"κ" : "\u{03BA}",
"λ" : "\u{03BB}",
"μ" : "\u{03BC}",
"ν" : "\u{03BD}",
"ξ" : "\u{03BE}",
"ο" : "\u{03BF}",
"π" : "\u{03C0}",
"ρ" : "\u{03C1}",
"ς" : "\u{03C2}",
"σ" : "\u{03C3}",
"τ" : "\u{03C4}",
"υ" : "\u{03C5}",
"φ" : "\u{03C6}",
"χ" : "\u{03C7}",
"ψ" : "\u{03C8}",
"ω" : "\u{03C9}",
"ϑ" : "\u{03D1}",
"ϒ" : "\u{03D2}",
"ϖ" : "\u{03D6}",
" " : "\u{2002}",
" " : "\u{2003}",
" " : "\u{2009}",
"‌" : "\u{200C}",
"‍" : "\u{200D}",
"‎" : "\u{200E}",
"‏" : "\u{200F}",
"–" : "\u{2013}",
"—" : "\u{2014}",
"‘" : "\u{2018}",
"’" : "\u{2019}",
"‚" : "\u{201A}",
"“" : "\u{201C}",
"”" : "\u{201D}",
"„" : "\u{201E}",
"†" : "\u{2020}",
"‡" : "\u{2021}",
"•" : "\u{2022}",
"…" : "\u{2026}",
"‰" : "\u{2030}",
"′" : "\u{2032}",
"″" : "\u{2033}",
"‹" : "\u{2039}",
"›" : "\u{203A}",
"‾" : "\u{203E}",
"⁄" : "\u{2044}",
"€" : "\u{20AC}",
"ℑ" : "\u{2111}",
"℘" : "\u{2118}",
"ℜ" : "\u{211C}",
"™" : "\u{2122}",
"ℵ" : "\u{2135}",
"←" : "\u{2190}",
"↑" : "\u{2191}",
"→" : "\u{2192}",
"↓" : "\u{2193}",
"↔" : "\u{2194}",
"↵" : "\u{21B5}",
"⇐" : "\u{21D0}",
"⇑" : "\u{21D1}",
"⇒" : "\u{21D2}",
"⇓" : "\u{21D3}",
"⇔" : "\u{21D4}",
"∀" : "\u{2200}",
"∂" : "\u{2202}",
"∃" : "\u{2203}",
"∅" : "\u{2205}",
"∇" : "\u{2207}",
"∈" : "\u{2208}",
"∉" : "\u{2209}",
"∋" : "\u{220B}",
"∏" : "\u{220F}",
"∑" : "\u{2211}",
"−" : "\u{2212}",
"∗" : "\u{2217}",
"√" : "\u{221A}",
"∝" : "\u{221D}",
"∞" : "\u{221E}",
"∠" : "\u{2220}",
"∧" : "\u{2227}",
"∨" : "\u{2228}",
"∩" : "\u{2229}",
"∪" : "\u{222A}",
"∫" : "\u{222B}",
"∴" : "\u{2234}",
"∼" : "\u{223C}",
"≅" : "\u{2245}",
"≈" : "\u{2248}",
"≠" : "\u{2260}",
"≡" : "\u{2261}",
"≤" : "\u{2264}",
"≥" : "\u{2265}",
"⊂" : "\u{2282}",
"⊃" : "\u{2283}",
"⊄" : "\u{2284}",
"⊆" : "\u{2286}",
"⊇" : "\u{2287}",
"⊕" : "\u{2295}",
"⊗" : "\u{2297}",
"⊥" : "\u{22A5}",
"⋅" : "\u{22C5}",
"⌈" : "\u{2308}",
"⌉" : "\u{2309}",
"⌊" : "\u{230A}",
"⌋" : "\u{230B}",
"⟨" : "\u{2329}",
"⟩" : "\u{232A}",
"◊" : "\u{25CA}",
"♠" : "\u{2660}",
"♣" : "\u{2663}",
"♥" : "\u{2665}",
"♦" : "\u{2666}",
]
extension String {
//method 1: use NSAttributedString and NSHTMLTextDocumentType to filter out html entity
//pros: can filter out all html tags ?
//cons:
//1. must run in main thread otherwise crash
//2. will remove html tags ?
// var decodedHtml:String {
// var decodedHtmlStr = self
//
// //print("decodedHtmlStr=\(decodedHtmlStr)")
// do {
// if let encodedData = decodedHtmlStr.dataUsingEncoding(NSUTF8StringEncoding) {
// let attributedOptions : [String: AnyObject] = [
// NSDocumentTypeDocumentAttribute : NSHTMLTextDocumentType,
// NSCharacterEncodingDocumentAttribute : NSUTF8StringEncoding
// ]
// //print("attributedOptions=\(attributedOptions)") //["DocumentType": NSHTML, "CharacterEncoding": 4]
// let attributedString = try NSAttributedString(data: encodedData, options: attributedOptions, documentAttributes: nil)
// //print("attributedString=\(attributedString)")
// /*
// attributedString=@Anglia again测试{
// NSColor = "UIDeviceRGBColorSpace 0 0 0 1";
// NSFont = "<UICTFont: 0x7e01b0a0> font-family: \"Times New Roman\"; font-weight: normal; font-style: normal; font-size: 12.00pt";
// NSKern = 0;
// NSParagraphStyle = "Alignment 4, LineSpacing 0, ParagraphSpacing 0, ParagraphSpacingBefore 0, HeadIndent 0, TailIndent 0, FirstLineHeadIndent 0, LineHeight 15/0, LineHeightMultiple 0, LineBreakMode 0, Tabs (\n), DefaultTabInterval 36, Blocks (\n), Lists (\n), BaseWritingDirection 0, HyphenationFactor 0, TighteningForTruncation NO, HeaderLevel 0";
// NSStrokeColor = "UIDeviceRGBColorSpace 0 0 0 1";
// NSStrokeWidth = 0;
// }
// */
// decodedHtmlStr = attributedString.string
// print("decodedHtmlStr=\(decodedHtmlStr)")
// }
// } catch {
// print("decodedHtml error: \(error)")
// }
//
// return decodedHtmlStr
// }
//method 2: use char entity decode
//pros: only decode html entity -> not filter out html tags
//cons:
/// Returns a new string made by removing in the `String`
/// anything enclosed in HTML brackets <>
public var strippedHtmlTags: String {
return stringByReplacingOccurrencesOfString("<[^>]+>", withString: "", options: .RegularExpressionSearch, range: nil);
}
/// Returns a new string made by replacing in the `String`
/// all HTML character entity references with the corresponding
/// character.
public var decodedHtmlEntities: String {
return decodeHTMLEntities().decodedString
}
/// Returns a tuple containing the string made by relpacing in the
/// `String` all HTML character entity references with the corresponding
/// character. Also returned is an array of offset information describing
/// the location and length offsets for each replacement. This allows
/// for the correct adjust any attributes that may be associated with
/// with substrings within the `String`
func decodeHTMLEntities() -> (decodedString: String, replacementOffsets: [(index: String.Index, offset: String.Index.Distance)]) {
// ===== Utility functions =====
// Record the index offsets of each replacement
// This allows anyone to correctly adjust any attributes that may be
// associated with substrings within the string
var replacementOffsets: [(index: String.Index, offset: String.Index.Distance)] = []
// Convert the number in the string to the corresponding
// Unicode character, e.g.
// decodeNumeric("64", 10) –> "@"
// decodeNumeric("20ac", 16) –> "€"
func decodeNumeric(string : String, base : Int32) -> Character? {
let code = UInt32(strtoul(string, nil, base))
return Character(UnicodeScalar(code))
}
// Decode the HTML character entity to the corresponding
// Unicode character, return `nil` for invalid input.
// decode("@") –> "@"
// decode("€") –> "€"
// decode("<") –> "<"
// decode("&foo;") –> nil
func decode(entity : String) -> Character? {
if entity.hasPrefix("&#x") || entity.hasPrefix("&#X"){
return decodeNumeric(entity.substringFromIndex(entity.startIndex.advancedBy(3)), base: 16)
} else if entity.hasPrefix("&#") {
return decodeNumeric(entity.substringFromIndex(entity.startIndex.advancedBy(2)), base: 10)
} else {
return HtmlCharacterEntitiesDict[entity]
}
}
// ===== Method starts here =====
var result = ""
var position = startIndex
// Find the next ‘&’ and copy the characters preceding it to `result`:
while let ampRange = self.rangeOfString("&", range: position ..< endIndex) {
//result.extend(self[position ..< ampRange.startIndex])
result.appendContentsOf(self[position ..< ampRange.startIndex])
position = ampRange.startIndex
// Find the next ‘;’ and copy everything from ‘&’ to ‘;’ into `entity`
if let semiRange = self.rangeOfString(";", range: position ..< endIndex) {
let entity = self[position ..< semiRange.endIndex]
if let decoded = decode(entity) {
// Replace by decoded character:
result.append(decoded)
// Record offset
//let offset = (index: semiRange.endIndex, offset: 1 – distance(position, semiRange.endIndex))
let offset = (index: semiRange.endIndex, offset: 1 – position.distanceTo(semiRange.endIndex))
replacementOffsets.append(offset)
} else {
// Invalid entity, copy verbatim:
//result.extend(entity)
result.appendContentsOf(entity)
}
position = semiRange.endIndex
} else {
// No matching ‘;’.
break
}
}
// Copy remaining characters to `result`:
//result.extend(self[position ..< endIndex])
result.appendContentsOf(self[position ..< endIndex])
// Return results
return (decodedString: result, replacementOffsets: replacementOffsets)
}
}
即可实现html的entity的解码:
效果:
转载请注明:在路上 » [已解决]swift去除nbsp的乱码