Merge branch 'phoenix' of github.com:InvictusInnovations/fc into phoenix

2014-01-14 03:51:20 -05:00 · 2014-01-14 03:51:20 -05:00 · d4e5d604a5
commit d4e5d604a5
parent c0727f30b0 90deb63fcd
19 changed files with 2910 additions and 18 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -11,6 +11,10 @@ INCLUDE_DIRECTORIES( ${CMAKE_SOURCE_DIR} )
 INCLUDE_DIRECTORIES( ${CMAKE_SOURCE_DIR}/include )
 INCLUDE_DIRECTORIES( ${CMAKE_CURRENT_SOURCE_DIR}/vendor/cyoencode-1.0.2/src )

+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/GitVersionGen/")
+include(GetGitRevisionDescription)
+get_git_head_revision(GIT_REFSPEC GIT_SHA3)
+
 SET( DEFAULT_HEADER_INSTALL_DIR include/\${target} )
 SET( DEFAULT_LIBRARY_INSTALL_DIR lib/ )
 SET( DEFAULT_EXECUTABLE_INSTALL_DIR bin/ )
@ -77,8 +81,9 @@ set( fc_sources
     src/thread/mutex.cpp
     src/asio.cpp
     src/string.cpp
-     src/shared_ptr.cpp 
-     src/time.cpp 
+     src/shared_ptr.cpp
+     src/time.cpp
+     src/utf8.cpp
     src/io/iostream.cpp
     src/io/datastream.cpp
     src/io/buffered_iostream.cpp
@ -130,6 +135,7 @@ set( fc_sources
 #     vendor/salsa20/ecrypt.c
     ${SALSA_SRC}
     )
+
     SET_PROPERTY( SOURCE 
                   vendor/salsa20/salsa20.s
                   PROPERTY LANGUAGE C)
@ -137,6 +143,10 @@ set( fc_sources
 set( sources
  ${fc_sources}
 )
+
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/GitSHA3.cpp.in" "${CMAKE_CURRENT_BINARY_DIR}/GitSHA3.cpp" @ONLY)
+list(APPEND sources "${CMAKE_CURRENT_BINARY_DIR}/GitSHA3.cpp" GitSHA3.h)
+
 add_subdirectory( vendor/easylzma )

 setup_library( fc SOURCES ${sources} LIBRARY_TYPE STATIC )
--- a/GitSHA3.cpp.in
+++ b/GitSHA3.cpp.in
@ -0,0 +1,4 @@
+#include "GitSHA3.h"
+
+#define GIT_SHA3 "@GIT_SHA3@"
+const char* const g_GIT_SHA3 = GIT_SHA3;
--- a/GitSHA3.h
+++ b/GitSHA3.h
@ -0,0 +1,8 @@
+#ifndef __GITSHA3_H
+#define __GITSHA3_H
+
+extern const char* const g_GIT_SHA3;
+
+#define APPLICATION_VERSION "1.0 Beta1"
+
+#endif ///__GITSHA3_H
--- a/fc.natvis
+++ b/fc.natvis
@ -0,0 +1,57 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- This file contains Debugger Visualizers for the FC library.  It's hard to navigate the
+	 FC objects in a debugger because the tricks used in classes like fc::fwd require manual
+	 casting to view the underlying objects in the debugger.  This file tells the Visual 
+	 Studio debugger how to see through these tricks and traverse the classes naturally.
+	 
+     To install, drop this file in your My Documents\Visual Studio 2012\Visualizers directory 
+	 
+     In addition, consider grabbing the .natvis files from the C++ Debugger Visualizers
+	 project here to improve display of boost objects:
+		https://github.com/KindDragon/CPPDebuggerVisualizers#c-debugger-visualizers-
+ -->
+<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+<Type Name="fc::variant">
+	<Expand>
+		<Item Name="value" Condition="(fc::variant::type_id)(*(((char*)(&amp;_data)) + sizeof(fc::variant) - 1)) == fc::variant::int64_type">*(const int64_t*)(&amp;_data)</Item>
+		<Item Name="value" Condition="(fc::variant::type_id)(*(((char*)(&amp;_data)) + sizeof(fc::variant) - 1)) == fc::variant::uint64_type">*(const uint64_t*)(&amp;_data)</Item>
+		<Item Name="value" Condition="(fc::variant::type_id)(*(((char*)(&amp;_data)) + sizeof(fc::variant) - 1)) == fc::variant::double_type">*(const double*)(&amp;_data)</Item>
+		<Item Name="value" Condition="(fc::variant::type_id)(*(((char*)(&amp;_data)) + sizeof(fc::variant) - 1)) == fc::variant::bool_type">*(const bool*)(&amp;_data)</Item>
+		<Item Name="value" Condition="(fc::variant::type_id)(*(((char*)(&amp;_data)) + sizeof(fc::variant) - 1)) == fc::variant::string_type">*(const const_string_ptr*)(&amp;_data)</Item>
+		<Item Name="value" Condition="(fc::variant::type_id)(*(((char*)(&amp;_data)) + sizeof(fc::variant) - 1)) == fc::variant::array_type">*(const const_variants_ptr*)(&amp;_data)</Item>
+		<Item Name="value" Condition="(fc::variant::type_id)(*(((char*)(&amp;_data)) + sizeof(fc::variant) - 1)) == fc::variant::object_type">*(const const_variant_object_ptr*)(&amp;_data)</Item>
+		<Item Name="type">(fc::variant::type_id)(*(((char*)(&amp;_data)) + sizeof(fc::variant) - 1))</Item>
+	</Expand>
+</Type>
+
+<Type Name="fc::fwd&lt;*&gt;">
+	<DisplayString>{*($T1*)(((_store)._store)._data)}</DisplayString>
+	<Expand>
+		<ExpandedItem>($T1*)(((_store)._store)._data)</ExpandedItem>
+	</Expand>
+</Type>
+
+<Type Name="fc::string">
+	<DisplayString>{my}</DisplayString>
+	<Expand>
+		<ExpandedItem>my</ExpandedItem>
+	</Expand>
+</Type>
+
+<Type Name="fc::optional&lt;*&gt;">
+    <DisplayString Condition="!_valid">invalid</DisplayString>
+    <DisplayString>valid, value = {*(($T1*)_value)}</DisplayString>
+    <Expand>
+        <Item Name="_value">($T1*)_value</Item>
+        <Item Name="_valid">_valid</Item>
+    </Expand>
+</Type>
+
+<Type Name="fc::path">
+	<DisplayString>{_p}</DisplayString>
+	<Expand>
+		<ExpandedItem>_p</ExpandedItem>
+	</Expand>
+</Type>
+
+</AutoVisualizer>
--- a/include/fc/filesystem.hpp
+++ b/include/fc/filesystem.hpp
@ -30,6 +30,8 @@ namespace fc {
      ~path();
      path( const boost::filesystem::path& );
      path( const fc::string& p );
+      /// Constructor to build path using unicode native characters.
+      path(const std::wstring& p);
      path( const char* );
      path( const path& p );
      path( path&& p );
@ -45,13 +47,23 @@ namespace fc {
      operator boost::filesystem::path& ();
      operator const boost::filesystem::path& ()const;

-      void       replace_extension( const fc::path& e );
-      fc::path   stem()const;
-      fc::path   extension()const;
-      fc::path   filename()const;
-      fc::path   parent_path()const;
-      fc::string string()const;
-      fc::string generic_string()const;
+      void         replace_extension( const fc::path& e );
+      fc::path     stem()const;
+      fc::path     extension()const;
+      fc::path     filename()const;
+      fc::path     parent_path()const;
+      fc::string   string()const;
+      fc::string   generic_string()const;
+
+      std::wstring wstring() const;
+      std::wstring generic_wstring() const;
+
+      /** Retrieves native string path representation and next converts it into
+          ANSI UTF-8 representation.
+          It is needed since not all parts of fc library accept unicode paths
+          (fc::file_mapping).
+      */
+      std::string  toNativeAnsiPath() const;

      /**
       * @brief replaces '/' with '\' in the result of generic_string()
--- a/include/fc/thread/task.hpp
+++ b/include/fc/thread/task.hpp
@ -13,7 +13,7 @@ namespace fc {
      void        run(); 
    protected:
      ~task_base();
-
+      /// Task priority looks like unsupported feature.
      uint64_t    _posted_num;
      priority    _prio;
      time_point  _when;
--- a/include/fc/utf8.hpp
+++ b/include/fc/utf8.hpp
@ -0,0 +1,26 @@
+#ifndef __UTF8_HPP
+#define __UTF8_HPP
+
+#include <string>
+
+/// This file contains general purpose utilities related to UTF-8 <-> Unicode conversions
+
+namespace fc
+{
+
+/** Decodes utf 8 std::string into unicode string.
+    @param input   - input string to be decoded and stored in 'storage'
+    @param storage - buffer for converted text. Cannot be nullptr.
+*/
+void decodeUtf8(const std::string& input, std::wstring* storage);
+  
+/** Encodes given wide (unicode) string into UTF-8 representation.
+    @param input - input string to be encoded and stored in 'storage'
+    @param storage - buffer for converted text. Cannot be nullptr.
+*/
+void encodeUtf8(const std::wstring& input, std::string* storage);
+
+} /// namespace fc
+
+#endif ///__UTF8_HPP
+
--- a/src/filesystem.cpp
+++ b/src/filesystem.cpp
@ -4,14 +4,29 @@
 #include <fc/fwd_impl.hpp>
 #include <fc/utility.hpp>
 #include <fc/io/fstream.hpp>
+
+#include <fc/utf8.hpp>
+#include <fc/variant.hpp>
+
 #include <boost/config.hpp>
 #include <boost/filesystem.hpp>
-#include <fc/variant.hpp>
+
+#ifdef WIN32
+  #include <windows.h>
+#endif

 namespace fc {
  void to_variant( const fc::path& t, variant& v ) {
-    v = t.generic_string();
+    std::string path = t.toNativeAnsiPath();
+    for(auto& c : path)
+      {
+      if(c == '\\')
+        c = '/';
+      }
+
+    v = path;
  }
+
  void from_variant( const fc::variant& v, fc::path& t ) {
    t = fc::path(v.as_string());
  }
@ -29,6 +44,9 @@ namespace fc {
   path::path( const fc::string& p )
   :_p(p.c_str()){}

+   path::path(const std::wstring& p)
+   :_p(p) {}
+
   path::path( const path& p )
   :_p(p){}

@ -67,6 +85,36 @@ namespace fc {
   fc::string path::generic_string()const {
    return _p->generic_string();
   }
+
+  std::wstring path::wstring() const
+    {
+    return _p->wstring();
+    }
+
+  std::wstring path::generic_wstring() const
+    {
+    return _p->generic_wstring();
+    }
+
+  std::string path::toNativeAnsiPath() const
+    {
+    std::wstring path = generic_wstring();
+
+#ifdef WIN32
+    const size_t maxPath = 32*1024;
+    std::vector<wchar_t> shortPath;
+    shortPath.resize(maxPath + 1);
+          
+    wchar_t* buffer = shortPath.data();
+    DWORD res = GetShortPathNameW(path.c_str(), buffer, maxPath);
+    if(res != 0)
+      path = buffer;
+#endif
+    std::string filePath;
+    fc::encodeUtf8(path, &filePath);
+    return filePath;
+    }
+
   /**
    *  @todo use iterators instead of indexes for 
    *  faster performance
--- a/src/interprocess/mmap_struct.cpp
+++ b/src/interprocess/mmap_struct.cpp
@ -1,6 +1,9 @@
 #include <fc/interprocess/mmap_struct.hpp>
+
 #include <fc/filesystem.hpp>
+
 #include <fc/io/fstream.hpp>
+
 #include <string.h>

 namespace fc
@ -29,7 +32,10 @@ namespace fc
               bytes_left -= to_write;
            }
         }
-         _file_mapping.reset( new fc::file_mapping( file.generic_string().c_str(), fc::read_write ) );
+
+         std::string filePath = file.toNativeAnsiPath();
+
+         _file_mapping.reset( new fc::file_mapping( filePath.c_str(), fc::read_write ) );
         _mapped_region.reset( new fc::mapped_region( *_file_mapping, fc::read_write, 0, s ) );
      }
   } // namespace fc
--- a/src/io/fstream.cpp
+++ b/src/io/fstream.cpp
@ -4,6 +4,7 @@
 #include <fc/exception/exception.hpp>
 #include <fc/log/logger.hpp>

+#include <boost/filesystem/path.hpp>

 namespace fc {
   class ofstream::impl : public fc::retainable {
@ -23,7 +24,8 @@ namespace fc {
   ofstream::~ofstream(){}

   void ofstream::open( const fc::path& file, int m ) {
-      my->ofs.open( file.string().c_str(), std::ios::binary );
+     const boost::filesystem::path& bfp = file; 
+     my->ofs.open( bfp.native(), std::ios::binary );
   }
   size_t ofstream::writesome( const char* buf, size_t len ) {
        my->ofs.write(buf,len);
@ -49,7 +51,8 @@ namespace fc {
   ifstream::~ifstream(){}

   void ifstream::open( const fc::path& file, int m ) {
-      my->ifs.open( file.string().c_str(), std::ios::binary );
+     const boost::filesystem::path& bfp = file; 
+      my->ifs.open( bfp.native(), std::ios::binary );
   }
   size_t ifstream::readsome( char* buf, size_t len ) {
      auto s = size_t(my->ifs.readsome( buf, len ));
--- a/src/io/json.cpp
+++ b/src/io/json.cpp
@ -580,7 +580,7 @@ namespace fc

   void          json::save_to_file( const variant& v, const fc::path& fi, bool pretty )
   {
-       fc::ofstream o( fi.generic_string().c_str() );
+       fc::ofstream o(fi);
       fc::to_stream( o, v );
   }
   variant json::from_file( const fc::path& p )
--- a/src/thread/task.cpp
+++ b/src/thread/task.cpp
@ -9,7 +9,12 @@

 namespace fc {
  task_base::task_base(void* func)
-  :_functor(func){
+  :
+  _posted_num(0),
+  _active_context(nullptr),
+  _next(nullptr),
+  _promise_impl(nullptr),
+  _functor(func){
  }

  void task_base::run() {
--- a/src/thread/thread_d.hpp
+++ b/src/thread/thread_d.hpp
@ -440,7 +440,7 @@ namespace fc {

         void unblock( fc::context* c ) {
             if(  fc::thread::current().my != this ) {
-               async( [=](){ unblock(c); } );
+               self.async( [=](){ unblock(c); } );
               return;
             }
               if( c != current ) ready_push_front(c); 
--- a/src/utf8.cpp
+++ b/src/utf8.cpp
@ -0,0 +1,28 @@
+#include "fc/utf8.hpp"
+
+#include "utf8/checked.h"
+#include "utf8/core.h"
+#include "utf8/unchecked.h"
+
+#include <assert.h>
+
+namespace fc
+{
+
+void decodeUtf8(const std::string& input, std::wstring* storage)
+  {
+  assert(storage != nullptr);
+
+  utf8::utf8to32(input.begin(), input.end(), std::back_inserter(*storage));
+  }
+
+void encodeUtf8(const std::wstring& input, std::string* storage)
+  {
+  assert(storage != nullptr);
+
+  utf8::utf32to8(input.begin(), input.end(), std::back_inserter(*storage));
+  }
+
+} ///namespace fc
+
+
--- a/src/utf8/ReleaseNotes
+++ b/src/utf8/ReleaseNotes
@ -0,0 +1,12 @@
+utf8 cpp library
+Release 2.3.4
+
+A minor bug fix release. Thanks to all who reported bugs. 
+
+Note: Version 2.3.3 contained a regression, and therefore was removed.
+
+Changes from version 2.3.2
+- Bug fix [39]: checked.h Line 273 and unchecked.h Line 182 have an extra ';'
+- Bug fix [36]: replace_invalid() only works with back_inserter
+
+Files included in the release: utf8.h, core.h, checked.h, unchecked.h, utf8cpp.html, ReleaseNotes
--- a/src/utf8/checked.h
+++ b/src/utf8/checked.h
@ -0,0 +1,327 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "core.h"
+#include <stdexcept>
+
+namespace utf8
+{
+    // Base for the exceptions that may be thrown from the library
+    class exception : public ::std::exception {
+    };
+
+    // Exceptions that may be thrown from the library functions.
+    class invalid_code_point : public exception {
+        uint32_t cp;
+    public:
+        invalid_code_point(uint32_t cp) : cp(cp) {}
+        virtual const char* what() const throw() { return "Invalid code point"; }
+        uint32_t code_point() const {return cp;}
+    };
+
+    class invalid_utf8 : public exception {
+        uint8_t u8;
+    public:
+        invalid_utf8 (uint8_t u) : u8(u) {}
+        virtual const char* what() const throw() { return "Invalid UTF-8"; }
+        uint8_t utf8_octet() const {return u8;}
+    };
+
+    class invalid_utf16 : public exception {
+        uint16_t u16;
+    public:
+        invalid_utf16 (uint16_t u) : u16(u) {}
+        virtual const char* what() const throw() { return "Invalid UTF-16"; }
+        uint16_t utf16_word() const {return u16;}
+    };
+
+    class not_enough_room : public exception {
+    public:
+        virtual const char* what() const throw() { return "Not enough space"; }
+    };
+
+    /// The library API - functions intended to be called by the users
+
+    template <typename octet_iterator>
+    octet_iterator append(uint32_t cp, octet_iterator result)
+    {
+        if (!utf8::internal::is_code_point_valid(cp))
+            throw invalid_code_point(cp);
+
+        if (cp < 0x80)                        // one octet
+            *(result++) = static_cast<uint8_t>(cp);
+        else if (cp < 0x800) {                // two octets
+            *(result++) = static_cast<uint8_t>((cp >> 6)            | 0xc0);
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
+        }
+        else if (cp < 0x10000) {              // three octets
+            *(result++) = static_cast<uint8_t>((cp >> 12)           | 0xe0);
+            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
+        }
+        else {                                // four octets
+            *(result++) = static_cast<uint8_t>((cp >> 18)           | 0xf0);
+            *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)  | 0x80);
+            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
+        }
+        return result;
+    }
+
+    template <typename octet_iterator, typename output_iterator>
+    output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
+    {
+        while (start != end) {
+            octet_iterator sequence_start = start;
+            internal::utf_error err_code = utf8::internal::validate_next(start, end);
+            switch (err_code) {
+                case internal::UTF8_OK :
+                    for (octet_iterator it = sequence_start; it != start; ++it)
+                        *out++ = *it;
+                    break;
+                case internal::NOT_ENOUGH_ROOM:
+                    throw not_enough_room();
+                case internal::INVALID_LEAD:
+                    out = utf8::append (replacement, out);
+                    ++start;
+                    break;
+                case internal::INCOMPLETE_SEQUENCE:
+                case internal::OVERLONG_SEQUENCE:
+                case internal::INVALID_CODE_POINT:
+                    out = utf8::append (replacement, out);
+                    ++start;
+                    // just one replacement mark for the sequence
+                    while (start != end && utf8::internal::is_trail(*start))
+                        ++start;
+                    break;
+            }
+        }
+        return out;
+    }
+
+    template <typename octet_iterator, typename output_iterator>
+    inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
+    {
+        static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
+        return utf8::replace_invalid(start, end, out, replacement_marker);
+    }
+
+    template <typename octet_iterator>
+    uint32_t next(octet_iterator& it, octet_iterator end)
+    {
+        uint32_t cp = 0;
+        internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
+        switch (err_code) {
+            case internal::UTF8_OK :
+                break;
+            case internal::NOT_ENOUGH_ROOM :
+                throw not_enough_room();
+            case internal::INVALID_LEAD :
+            case internal::INCOMPLETE_SEQUENCE :
+            case internal::OVERLONG_SEQUENCE :
+                throw invalid_utf8(*it);
+            case internal::INVALID_CODE_POINT :
+                throw invalid_code_point(cp);
+        }
+        return cp;
+    }
+
+    template <typename octet_iterator>
+    uint32_t peek_next(octet_iterator it, octet_iterator end)
+    {
+        return utf8::next(it, end);
+    }
+
+    template <typename octet_iterator>
+    uint32_t prior(octet_iterator& it, octet_iterator start)
+    {
+        // can't do much if it == start
+        if (it == start)
+            throw not_enough_room();
+
+        octet_iterator end = it;
+        // Go back until we hit either a lead octet or start
+        while (utf8::internal::is_trail(*(--it)))
+            if (it == start)
+                throw invalid_utf8(*it); // error - no lead byte in the sequence
+        return utf8::peek_next(it, end);
+    }
+
+    /// Deprecated in versions that include "prior"
+    template <typename octet_iterator>
+    uint32_t previous(octet_iterator& it, octet_iterator pass_start)
+    {
+        octet_iterator end = it;
+        while (utf8::internal::is_trail(*(--it)))
+            if (it == pass_start)
+                throw invalid_utf8(*it); // error - no lead byte in the sequence
+        octet_iterator temp = it;
+        return utf8::next(temp, end);
+    }
+
+    template <typename octet_iterator, typename distance_type>
+    void advance (octet_iterator& it, distance_type n, octet_iterator end)
+    {
+        for (distance_type i = 0; i < n; ++i)
+            utf8::next(it, end);
+    }
+
+    template <typename octet_iterator>
+    typename std::iterator_traits<octet_iterator>::difference_type
+    distance (octet_iterator first, octet_iterator last)
+    {
+        typename std::iterator_traits<octet_iterator>::difference_type dist;
+        for (dist = 0; first < last; ++dist)
+            utf8::next(first, last);
+        return dist;
+    }
+
+    template <typename u16bit_iterator, typename octet_iterator>
+    octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
+    {
+        while (start != end) {
+            uint32_t cp = utf8::internal::mask16(*start++);
+            // Take care of surrogate pairs first
+            if (utf8::internal::is_lead_surrogate(cp)) {
+                if (start != end) {
+                    uint32_t trail_surrogate = utf8::internal::mask16(*start++);
+                    if (utf8::internal::is_trail_surrogate(trail_surrogate))
+                        cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
+                    else
+                        throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
+                }
+                else
+                    throw invalid_utf16(static_cast<uint16_t>(cp));
+
+            }
+            // Lone trail surrogate
+            else if (utf8::internal::is_trail_surrogate(cp))
+                throw invalid_utf16(static_cast<uint16_t>(cp));
+
+            result = utf8::append(cp, result);
+        }
+        return result;
+    }
+
+    template <typename u16bit_iterator, typename octet_iterator>
+    u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
+    {
+        while (start != end) {
+            uint32_t cp = utf8::next(start, end);
+            if (cp > 0xffff) { //make a surrogate pair
+                *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
+                *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
+            }
+            else
+                *result++ = static_cast<uint16_t>(cp);
+        }
+        return result;
+    }
+
+    template <typename octet_iterator, typename u32bit_iterator>
+    octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
+    {
+        while (start != end)
+            result = utf8::append(*(start++), result);
+
+        return result;
+    }
+
+    template <typename octet_iterator, typename u32bit_iterator>
+    u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
+    {
+        while (start != end)
+            (*result++) = utf8::next(start, end);
+
+        return result;
+    }
+
+    // The iterator class
+    template <typename octet_iterator>
+    class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
+      octet_iterator it;
+      octet_iterator range_start;
+      octet_iterator range_end;
+      public:
+      iterator () {}
+      explicit iterator (const octet_iterator& octet_it,
+                         const octet_iterator& range_start,
+                         const octet_iterator& range_end) :
+               it(octet_it), range_start(range_start), range_end(range_end)
+      {
+          if (it < range_start || it > range_end)
+              throw std::out_of_range("Invalid utf-8 iterator position");
+      }
+      // the default "big three" are OK
+      octet_iterator base () const { return it; }
+      uint32_t operator * () const
+      {
+          octet_iterator temp = it;
+          return utf8::next(temp, range_end);
+      }
+      bool operator == (const iterator& rhs) const
+      {
+          if (range_start != rhs.range_start || range_end != rhs.range_end)
+              throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
+          return (it == rhs.it);
+      }
+      bool operator != (const iterator& rhs) const
+      {
+          return !(operator == (rhs));
+      }
+      iterator& operator ++ ()
+      {
+          utf8::next(it, range_end);
+          return *this;
+      }
+      iterator operator ++ (int)
+      {
+          iterator temp = *this;
+          utf8::next(it, range_end);
+          return temp;
+      }
+      iterator& operator -- ()
+      {
+          utf8::prior(it, range_start);
+          return *this;
+      }
+      iterator operator -- (int)
+      {
+          iterator temp = *this;
+          utf8::prior(it, range_start);
+          return temp;
+      }
+    }; // class iterator
+
+} // namespace utf8
+
+#endif //header guard
+
+
--- a/src/utf8/core.h
+++ b/src/utf8/core.h
@ -0,0 +1,329 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include <iterator>
+
+namespace utf8
+{
+    // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
+    // You may need to change them to match your system.
+    // These typedefs have the same names as ones from cstdint, or boost/cstdint
+    typedef unsigned char   uint8_t;
+    typedef unsigned short  uint16_t;
+    typedef unsigned int    uint32_t;
+
+// Helper code - not intended to be directly called by the library users. May be changed at any time
+namespace internal
+{
+    // Unicode constants
+    // Leading (high) surrogates: 0xd800 - 0xdbff
+    // Trailing (low) surrogates: 0xdc00 - 0xdfff
+    const uint16_t LEAD_SURROGATE_MIN  = 0xd800u;
+    const uint16_t LEAD_SURROGATE_MAX  = 0xdbffu;
+    const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
+    const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
+    const uint16_t LEAD_OFFSET         = LEAD_SURROGATE_MIN - (0x10000 >> 10);
+    const uint32_t SURROGATE_OFFSET    = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
+
+    // Maximum valid value for a Unicode code point
+    const uint32_t CODE_POINT_MAX      = 0x0010ffffu;
+
+    template<typename octet_type>
+    inline uint8_t mask8(octet_type oc)
+    {
+        return static_cast<uint8_t>(0xff & oc);
+    }
+    template<typename u16_type>
+    inline uint16_t mask16(u16_type oc)
+    {
+        return static_cast<uint16_t>(0xffff & oc);
+    }
+    template<typename octet_type>
+    inline bool is_trail(octet_type oc)
+    {
+        return ((utf8::internal::mask8(oc) >> 6) == 0x2);
+    }
+
+    template <typename u16>
+    inline bool is_lead_surrogate(u16 cp)
+    {
+        return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
+    }
+
+    template <typename u16>
+    inline bool is_trail_surrogate(u16 cp)
+    {
+        return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
+    }
+
+    template <typename u16>
+    inline bool is_surrogate(u16 cp)
+    {
+        return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
+    }
+
+    template <typename u32>
+    inline bool is_code_point_valid(u32 cp)
+    {
+        return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
+    }
+
+    template <typename octet_iterator>
+    inline typename std::iterator_traits<octet_iterator>::difference_type
+    sequence_length(octet_iterator lead_it)
+    {
+        uint8_t lead = utf8::internal::mask8(*lead_it);
+        if (lead < 0x80)
+            return 1;
+        else if ((lead >> 5) == 0x6)
+            return 2;
+        else if ((lead >> 4) == 0xe)
+            return 3;
+        else if ((lead >> 3) == 0x1e)
+            return 4;
+        else
+            return 0;
+    }
+
+    template <typename octet_difference_type>
+    inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
+    {
+        if (cp < 0x80) {
+            if (length != 1) 
+                return true;
+        }
+        else if (cp < 0x800) {
+            if (length != 2) 
+                return true;
+        }
+        else if (cp < 0x10000) {
+            if (length != 3) 
+                return true;
+        }
+
+        return false;
+    }
+
+    enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
+
+    /// Helper for get_sequence_x
+    template <typename octet_iterator>
+    utf_error increase_safely(octet_iterator& it, octet_iterator end)
+    {
+        if (++it == end)
+            return NOT_ENOUGH_ROOM;
+
+        if (!utf8::internal::is_trail(*it))
+            return INCOMPLETE_SEQUENCE;
+        
+        return UTF8_OK;
+    }
+
+    #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}    
+
+    /// get_sequence_x functions decode utf-8 sequences of the length x
+    template <typename octet_iterator>
+    utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        if (it == end)
+            return NOT_ENOUGH_ROOM;
+
+        code_point = utf8::internal::mask8(*it);
+
+        return UTF8_OK;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        if (it == end) 
+            return NOT_ENOUGH_ROOM;
+        
+        code_point = utf8::internal::mask8(*it);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
+
+        return UTF8_OK;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        if (it == end)
+            return NOT_ENOUGH_ROOM;
+            
+        code_point = utf8::internal::mask8(*it);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point += (*it) & 0x3f;
+
+        return UTF8_OK;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        if (it == end)
+           return NOT_ENOUGH_ROOM;
+
+        code_point = utf8::internal::mask8(*it);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point += (*it) & 0x3f;
+
+        return UTF8_OK;
+    }
+
+    #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
+
+    template <typename octet_iterator>
+    utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        // Save the original value of it so we can go back in case of failure
+        // Of course, it does not make much sense with i.e. stream iterators
+        octet_iterator original_it = it;
+
+        uint32_t cp = 0;
+        // Determine the sequence length based on the lead octet
+        typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
+        const octet_difference_type length = utf8::internal::sequence_length(it);
+
+        // Get trail octets and calculate the code point
+        utf_error err = UTF8_OK;
+        switch (length) {
+            case 0: 
+                return INVALID_LEAD;
+            case 1:
+                err = utf8::internal::get_sequence_1(it, end, cp);
+                break;
+            case 2:
+                err = utf8::internal::get_sequence_2(it, end, cp);
+            break;
+            case 3:
+                err = utf8::internal::get_sequence_3(it, end, cp);
+            break;
+            case 4:
+                err = utf8::internal::get_sequence_4(it, end, cp);
+            break;
+        }
+
+        if (err == UTF8_OK) {
+            // Decoding succeeded. Now, security checks...
+            if (utf8::internal::is_code_point_valid(cp)) {
+                if (!utf8::internal::is_overlong_sequence(cp, length)){
+                    // Passed! Return here.
+                    code_point = cp;
+                    ++it;
+                    return UTF8_OK;
+                }
+                else
+                    err = OVERLONG_SEQUENCE;
+            }
+            else 
+                err = INVALID_CODE_POINT;
+        }
+
+        // Failure branch - restore the original value of the iterator
+        it = original_it;
+        return err;
+    }
+
+    template <typename octet_iterator>
+    inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
+        uint32_t ignored;
+        return utf8::internal::validate_next(it, end, ignored);
+    }
+
+} // namespace internal
+
+    /// The library API - functions intended to be called by the users
+
+    // Byte order mark
+    const uint8_t bom[] = {0xef, 0xbb, 0xbf};
+
+    template <typename octet_iterator>
+    octet_iterator find_invalid(octet_iterator start, octet_iterator end)
+    {
+        octet_iterator result = start;
+        while (result != end) {
+            utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end);
+            if (err_code != internal::UTF8_OK)
+                return result;
+        }
+        return result;
+    }
+
+    template <typename octet_iterator>
+    inline bool is_valid(octet_iterator start, octet_iterator end)
+    {
+        return (utf8::find_invalid(start, end) == end);
+    }
+
+    template <typename octet_iterator>
+    inline bool starts_with_bom (octet_iterator it, octet_iterator end)
+    {
+        return (
+            ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
+            ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
+            ((it != end) && (utf8::internal::mask8(*it))   == bom[2])
+           );
+    }
+	
+    //Deprecated in release 2.3 
+    template <typename octet_iterator>
+    inline bool is_bom (octet_iterator it)
+    {
+        return (
+            (utf8::internal::mask8(*it++)) == bom[0] &&
+            (utf8::internal::mask8(*it++)) == bom[1] &&
+            (utf8::internal::mask8(*it))   == bom[2]
+           );
+    }
+} // namespace utf8
+
+#endif // header guard
+
+
--- a/src/utf8/unchecked.h
+++ b/src/utf8/unchecked.h
@ -0,0 +1,228 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "core.h"
+
+namespace utf8
+{
+    namespace unchecked 
+    {
+        template <typename octet_iterator>
+        octet_iterator append(uint32_t cp, octet_iterator result)
+        {
+            if (cp < 0x80)                        // one octet
+                *(result++) = static_cast<uint8_t>(cp);  
+            else if (cp < 0x800) {                // two octets
+                *(result++) = static_cast<uint8_t>((cp >> 6)          | 0xc0);
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
+            }
+            else if (cp < 0x10000) {              // three octets
+                *(result++) = static_cast<uint8_t>((cp >> 12)         | 0xe0);
+                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
+            }
+            else {                                // four octets
+                *(result++) = static_cast<uint8_t>((cp >> 18)         | 0xf0);
+                *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
+                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
+            }
+            return result;
+        }
+
+        template <typename octet_iterator>
+        uint32_t next(octet_iterator& it)
+        {
+            uint32_t cp = utf8::internal::mask8(*it);
+            typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
+            switch (length) {
+                case 1:
+                    break;
+                case 2:
+                    it++;
+                    cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
+                    break;
+                case 3:
+                    ++it; 
+                    cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
+                    ++it;
+                    cp += (*it) & 0x3f;
+                    break;
+                case 4:
+                    ++it;
+                    cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);                
+                    ++it;
+                    cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
+                    ++it;
+                    cp += (*it) & 0x3f; 
+                    break;
+            }
+            ++it;
+            return cp;        
+        }
+
+        template <typename octet_iterator>
+        uint32_t peek_next(octet_iterator it)
+        {
+            return utf8::unchecked::next(it);    
+        }
+
+        template <typename octet_iterator>
+        uint32_t prior(octet_iterator& it)
+        {
+            while (utf8::internal::is_trail(*(--it))) ;
+            octet_iterator temp = it;
+            return utf8::unchecked::next(temp);
+        }
+
+        // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
+        template <typename octet_iterator>
+        inline uint32_t previous(octet_iterator& it)
+        {
+            return utf8::unchecked::prior(it);
+        }
+
+        template <typename octet_iterator, typename distance_type>
+        void advance (octet_iterator& it, distance_type n)
+        {
+            for (distance_type i = 0; i < n; ++i)
+                utf8::unchecked::next(it);
+        }
+
+        template <typename octet_iterator>
+        typename std::iterator_traits<octet_iterator>::difference_type
+        distance (octet_iterator first, octet_iterator last)
+        {
+            typename std::iterator_traits<octet_iterator>::difference_type dist;
+            for (dist = 0; first < last; ++dist) 
+                utf8::unchecked::next(first);
+            return dist;
+        }
+
+        template <typename u16bit_iterator, typename octet_iterator>
+        octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
+        {       
+            while (start != end) {
+                uint32_t cp = utf8::internal::mask16(*start++);
+            // Take care of surrogate pairs first
+                if (utf8::internal::is_lead_surrogate(cp)) {
+                    uint32_t trail_surrogate = utf8::internal::mask16(*start++);
+                    cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
+                }
+                result = utf8::unchecked::append(cp, result);
+            }
+            return result;         
+        }
+
+        template <typename u16bit_iterator, typename octet_iterator>
+        u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
+        {
+            while (start < end) {
+                uint32_t cp = utf8::unchecked::next(start);
+                if (cp > 0xffff) { //make a surrogate pair
+                    *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
+                    *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
+                }
+                else
+                    *result++ = static_cast<uint16_t>(cp);
+            }
+            return result;
+        }
+
+        template <typename octet_iterator, typename u32bit_iterator>
+        octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
+        {
+            while (start != end)
+                result = utf8::unchecked::append(*(start++), result);
+
+            return result;
+        }
+
+        template <typename octet_iterator, typename u32bit_iterator>
+        u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
+        {
+            while (start < end)
+                (*result++) = utf8::unchecked::next(start);
+
+            return result;
+        }
+
+        // The iterator class
+        template <typename octet_iterator>
+          class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> { 
+            octet_iterator it;
+            public:
+            iterator () {}
+            explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
+            // the default "big three" are OK
+            octet_iterator base () const { return it; }
+            uint32_t operator * () const
+            {
+                octet_iterator temp = it;
+                return utf8::unchecked::next(temp);
+            }
+            bool operator == (const iterator& rhs) const 
+            { 
+                return (it == rhs.it);
+            }
+            bool operator != (const iterator& rhs) const
+            {
+                return !(operator == (rhs));
+            }
+            iterator& operator ++ () 
+            {
+                ::std::advance(it, utf8::internal::sequence_length(it));
+                return *this;
+            }
+            iterator operator ++ (int)
+            {
+                iterator temp = *this;
+                ::std::advance(it, utf8::internal::sequence_length(it));
+                return temp;
+            }  
+            iterator& operator -- ()
+            {
+                utf8::unchecked::prior(it);
+                return *this;
+            }
+            iterator operator -- (int)
+            {
+                iterator temp = *this;
+                utf8::unchecked::prior(it);
+                return temp;
+            }
+          }; // class iterator
+
+    } // namespace utf8::unchecked
+} // namespace utf8 
+
+
+#endif // header guard
+
--- a/src/utf8/utf8cpp.html
+++ b/src/utf8/utf8cpp.html