janx · danhper · Aug 3, 2013
diff --git a/lib/CodingStateMachine.rb b/lib/CodingStateMachine.rb
@@ -14,12 +14,12 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
@@ -30,8 +30,11 @@
 
 module  UniversalDetector
     class CodingStateMachine
+        attr_accessor :active
+
         def initialize(sm)
             @_mModel = sm
+            @active = false
             @_mCurrentBytePos = 0
             @_mCurrentCharLen = 0
             reset()
@@ -45,7 +48,7 @@ def next_state(c)
             # for each byte we get its class
             # if it is first byte, we also get byte length
             byteCls = @_mModel['classTable'][c]
-            
+
             if @_mCurrentState == :Start
                 @_mCurrentBytePos = 0
                 @_mCurrentCharLen = @_mModel['charLenTable'][byteCls]
@@ -58,7 +61,7 @@ def next_state(c)
                 v = stateValue[@_mCurrentState]
             end
             @_mCurrentState = @_mModel['stateTable'][v * @_mModel['classFactor'] + byteCls]
-            
+
             @_mCurrentBytePos += 1
             return @_mCurrentState
         end
@@ -71,4 +74,4 @@ def get_coding_state_machine
             return @_mModel['name']
         end
     end
-end
+end
diff --git a/lib/EscCharSetProber.rb b/lib/EscCharSetProber.rb
@@ -14,12 +14,12 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
@@ -36,18 +36,18 @@ class EscCharSetProber < CharSetProber
         def initialize
             super
             @_mCodingSM = [ \
-                CodingStateMachine(HZSMModel),
-                CodingStateMachine(ISO2022CNSMModel),
-                CodingStateMachine(ISO2022JPSMModel),
-                CodingStateMachine(ISO2022KRSMModel)
+                CodingStateMachine.new(HZSMModel),
+                CodingStateMachine.new(ISO2022CNSMModel),
+                CodingStateMachine.new(ISO2022JPSMModel),
+                CodingStateMachine.new(ISO2022KRSMModel)
                 ]
             reset()
         end
 
         def reset
             super
             for codingSM in @_mCodingSM
-                unless codingSM then continue end
+                next if codingSM.nil?
                 codingSM.active = true
                 codingSM.reset()
             end
@@ -68,10 +68,10 @@ def get_confidence
         end
 
         def feed(aBuf)
-            for c in aBuf
+            aBuf.each_byte do |c|
                 for codingSM in @_mCodingSM
-                    unless codingSM then continue end
-                    unless codingSM.active then continue end
+                    next if codingSM.nil?
+                    next unless codingSM.active
                     codingState = codingSM.next_state(c)
                     if codingState == :Error
                         codingSM.active = false

diff --git a/lib/UniversalDetector.rb b/lib/UniversalDetector.rb
@@ -14,12 +14,12 @@
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
@@ -48,38 +48,38 @@ def chardet(data)
             u.result
         end
     end
-    
+
     DEBUG = nil
-  
+
     Detectiong = 0
     FoundIt = 1
     NotMe = 2
 
     Start = 0
     Error = 1
     ItsMe = 2
-  
+
     MINIMUM_THRESHOLD = 0.20
     PureAscii = 0
     EscAscii = 1
     Highbyte = 2
 
     SHORTCUT_THRESHOLD = 0.95
-  
+
     class Detector
-        
+
         include Singleton
-        
+
         attr_reader :result
-    
+
         def initialize
             @_highBitDetector = /[\x80-\xFF]/n
             @_escDetector = /\033|~\{/n
             @_mEscCharSetProber = nil
             @_mCharSetProbers = []
             reset
         end
-    
+
         def reset
             @result = {"encoding"=> nil, "confidence"=> 0.0}
             @done = false
@@ -93,11 +93,11 @@ def reset
             for prober in @_mCharSetProbers
                 prober.reset
             end
-        end    
-    
+        end
+
         def feed(data)
             if @done || data.empty?
-                return 
+                return
             end
             unless  @_mGotData
                 # If the data starts with BOM, we know it is UTF
@@ -107,7 +107,7 @@ def feed(data)
                 elsif data[0,4] == "\xFF\xFE\x00\x00"
                     # FF FE 00 00  UTF-32, little-endian BOM
                     @result = {"encoding"=> "UTF-32LE", "confidence"=> 1.0}
-                elsif data[0,4] == "\x00\x00\xFE\xFF" 
+                elsif data[0,4] == "\x00\x00\xFE\xFF"
                     # 00 00 FE FF  UTF-32, big-endian BOM
                     @result = {"encoding"=> "UTF-32BE", "confidence"=> 1.0}
                 elsif data[0,4] == "\xFE\xFF\x00\x00"
@@ -121,37 +121,37 @@ def feed(data)
                     @result = {"encoding"=> "UTF-16LE", "confidence"=> 1.0}
                 elsif data[0,2] == "\xFE\xFF"
                     # FE FF  UTF-16, big endian BOM
-                    @result = {"encoding"=> "UTF-16BE", "confidence"=> 1.0}          
+                    @result = {"encoding"=> "UTF-16BE", "confidence"=> 1.0}
                 end
             end
             @_mGotData = true
             if @result["encoding"] && @result["confidence"] > 0.0
                 @done = true
                 return
-            end            
-            
+            end
+
             if @_mInputState == :PureAscii
                 if data =~ @_highBitDetector
                     @_mInputState = :Highbyte
                 elsif (@_mLastChar + data) =~ @_escDetector
                     @_mInputState = :EscAscii
                 end
-            end                        
-            
+            end
+
             @_mLastChar = data[-1]
             if @_mInputState == :EscAscii
                 unless @_mEscCharSetProber
                     @_mEscCharSetProber = EscCharSetProber.new
                 end
-                if @_mEscCharSetProber.feed(data) == constants.eFoundIt
+                if @_mEscCharSetProber.feed(data) == :FoundIt
                     @result = {"encoding"=> @_mEscCharSetProber.get_charset_name() ,"confidence"=> @_mEscCharSetProber.get_confidence()}
-                    @done = true          
-                end  
+                    @done = true
+                end
             elsif @_mInputState == :Highbyte
                 if @_mCharSetProbers.empty?
                     @_mCharSetProbers = MBCSGroupProber.new.mProbers + SBCSGroupProber.new.mProbers + [Latin1Prober.new]
-                end                                
-                @_mCharSetProbers.each do |prober|                    
+                end
+                @_mCharSetProbers.each do |prober|
                     if prober.feed(data) == :FoundIt
                         @result = {"encoding"=> prober.get_charset_name(), "confidence"=> prober.get_confidence()}
                         @done = true
@@ -160,7 +160,7 @@ def feed(data)
                 end #for
             end
         end #feed
-  
+
         def close
             if @done then return end
             unless @_mGotData
@@ -170,7 +170,7 @@ def close
                 return
             end
             @done = true
-            
+
             if @_mInputState == :PureAscii
                 @result = {"encoding" =>  "ascii", "confidence" => 1.0}
                 return @result
@@ -194,7 +194,7 @@ def close
                     return @result
                 end
             end #if
-            
+
             if DEBUG
                 p("no probers hit minimum threshhold\n")
                 for prober in @_mCharSetProbers
@@ -203,8 +203,8 @@ def close
                                      [prober.get_charset_name(), \
                                       prober.get_confidence()])
                 end
-            end            
+            end
         end #close
     end #class
-                       
+
 end #module