Rob van der Woude's Scripting Pages
Powered by GeSHi

Source code for checkbom.vbs

(view source code of checkbom.vbs as plain text)

  1. ' Based on information from
  2. ' https://en.wikipedia.org/wiki/Byte_order_mark
  3. '
  4. 'Encoding       Hex BOM
  5. '========       =======
  6. 'BOCU-1         FB EE 28
  7. 'GB-18030       84 31 95 33
  8. 'SCSU           0E FE FF
  9. 'UTF-1          F7 64 4C
  10. 'UTF-7          2B 2F 76 (38|39|2B|2F)
  11. 'UTF-8          EF BB BF
  12. 'UTF-16 (BE)    FE FF
  13. 'UTF-16 (LE)    FF FE
  14. 'UTF-32 (BE)    00 00 FE FF
  15. 'UTF-32 (LE)    FF FE 00 00
  16. 'UTF-EBCDIC     DD 73 66 73
  17.  
  18. Option Explicit
  19.  
  20. Const adTypeBinary = 1
  21. Const adTypeText   = 2
  22.  
  23. Dim i, intRC
  24. Dim dicBOMs, objFSO, objStream
  25. Dim strBOM, strFile, strHead, strType, strUTF7
  26.  
  27. If WScript.Arguments.Unnamed.Count <> 1 Then Syntax
  28. If WScript.Arguments.Named.Count    > 0 Then Syntax
  29.  
  30. intRC   = 0
  31. strFile = WScript.Arguments.Unnamed(0)
  32. strType = "Unknown"
  33. strUTF7 = "38;39;2B;2F" ' Allowed values for 4th byte of UTF-7 BOM
  34.  
  35. Set objFSO = CreateObject( "Scripting.FileSystemObject" )
  36. If Not objFSO.FileExists( strFile ) Then Syntax
  37. Set objFSO = Nothing
  38.  
  39. Set dicBOMs = CreateObject( "Scripting.Dictionary" )
  40. dicBOMs.Add "0000FEFF", "UTF-32 (BE)"
  41. dicBOMs.Add "0EFEFF",   "SCSU"
  42. dicBOMs.Add "2B2F76",   "UTF-7" ' First 3 bytes of BOM only, 4th byte can have several values
  43. dicBOMs.Add "84319533", "GB-18030"
  44. dicBOMs.Add "DD736673", "UTF-EBCDIC"
  45. dicBOMs.Add "EFBBBF",   "UTF-8"
  46. dicBOMs.Add "F7644C",   "UTF-1"
  47. dicBOMs.Add "FBEE28",   "BOCU-1"
  48. dicBOMs.Add "FEFF",     "UTF-16 (BE)"
  49. dicBOMs.Add "FFFE",     "UTF-16 (LE)"
  50. dicBOMs.Add "FFFE0000", "UTF-32 (LE)"
  51.  
  52. On Error Resume Next
  53. Set objStream = CreateObject( "ADODB.Stream" )
  54. objStream.Open
  55. objStream.Type = adTypeBinary
  56. objStream.LoadFromFile strFile
  57. If Err Then intRC = 1
  58. objStream.Position = 0
  59. strHead = ""
  60. For i = 0 To 3
  61.         strHead = strHead & UCase( Right( "0" & Hex( AscB( objStream.Read( 1 ) ) ), 2 ) )
  62.         If Err Then intRC = 1
  63. Next
  64. objStream.Close
  65. Set objStream = Nothing
  66. On Error Goto 0
  67.  
  68. If intRC = 1 Then Syntax
  69.  
  70. For i = 8 To 4 Step -2 ' Try the longest match (4 bytes) first, next try 3 bytes, finally try 2 bytes
  71.         If strType = "Unknown" Then
  72.                 strBOM = Left( strHead, i )
  73.                 If dicBOMs.Exists( strBOM ) Then
  74.                         If dicBOMs( strBOM ) = "UTF-7" Then
  75.                                 If InStr( strUTF7, Right( strHead, 2 ) ) Then strType = "UTF-7"
  76.                         Else
  77.                                 strType = dicBOMs( strBOM )
  78.                         End If
  79.                 End If
  80.         End If
  81. Next
  82.  
  83. If strType = "Unknown" Then intRC = 1
  84.  
  85. WScript.Echo "File Name     : " & strFile & vbcrlf _
  86.            & "First 4 bytes : " & strHead & vbcrlf _
  87.            & "Matching BOM  : " & strBOM  & vbcrlf _
  88.            & "File Encoding : " & strType
  89.  
  90. WScript.Quit intRC
  91.  
  92.  
  93. Sub Syntax
  94.         Dim strMsg
  95.         strMsg = vbCrLf _
  96.                & "CheckBOM.vbs,  Version 1.00" _
  97.                & vbCrLf _
  98.                & "Check a file's Byte Order Mark (BOM) to determine its text encoding" _
  99.                & vbCrLf & vbCrLf _
  100.                & "Usage:  CheckBOM.vbs  textfilename" _
  101.                & vbCrLf & vbCrLf _
  102.                & "Note:   The file encoding is displayed on screen, e.g. ""UTF-7"" or" _
  103.                & vbCrLf _
  104.                & "        ""UTF-32 (LE)"", or ""Unknown"" if not recognized." _
  105.                & vbCrLf _
  106.                & "        Check this script's source code for a list of recognized BOMs." _
  107.                & vbCrLf & vbCrLf _
  108.                & "Written by Rob van der Woude" _
  109.                & vbCrLf _
  110.                & "http://www.robvanderwoude.com"
  111.         WScript.Echo strMsg
  112.         WScript.Quit 1
  113. End Sub
  114.  
  115.  

page last modified: 2024-04-16; loaded in 0.0212 seconds