Merge branch 'master' of https://github.com/cryptonomex/fc
This commit is contained in:
commit
e2e0f38dc6
115 changed files with 488 additions and 22831 deletions
5
.gitignore
vendored
5
.gitignore
vendored
|
|
@ -15,13 +15,13 @@
|
|||
*.a
|
||||
*.lib
|
||||
|
||||
#CMake->MSVC artifacts
|
||||
# CMake->MSVC artifacts
|
||||
*.sln
|
||||
*.vcxproj
|
||||
ALL_BUILD
|
||||
ZERO_CHECK
|
||||
|
||||
#MSVC secondary artifacts
|
||||
# MSVC secondary artifacts
|
||||
*.suo
|
||||
*.vcxproj.filters
|
||||
*.vcxproj.user
|
||||
|
|
@ -48,7 +48,6 @@ fc_automoc.cpp
|
|||
git_revision.cpp
|
||||
GitSHA3.cpp
|
||||
|
||||
lzma_test
|
||||
ntp_test
|
||||
task_cancel_test
|
||||
udt_client
|
||||
|
|
|
|||
109
CMakeLists.txt
109
CMakeLists.txt
|
|
@ -50,48 +50,45 @@ ELSE( ECC_IMPL STREQUAL openssl )
|
|||
ENDIF( ECC_IMPL STREQUAL openssl )
|
||||
|
||||
# Configure secp256k1-zkp
|
||||
|
||||
set( SECP256K1_DIR "${CMAKE_CURRENT_SOURCE_DIR}/vendor/secp256k1-zkp" )
|
||||
|
||||
file( GLOB SECP256K1_SOURCES "${SECP256K1_DIR}/src/secp256k1.c" )
|
||||
add_library( secp256k1 ${SECP256K1_SOURCES} )
|
||||
|
||||
target_include_directories( secp256k1 PRIVATE "${SECP256K1_DIR}" PUBLIC "${SECP256K1_DIR}/include" )
|
||||
|
||||
if ( WIN32 )
|
||||
# autoconf won't work here, hard code the defines
|
||||
set( SECP256K1_DIR "${CMAKE_CURRENT_SOURCE_DIR}/vendor/secp256k1-zkp" )
|
||||
|
||||
if( WIN32 )
|
||||
set( SECP256K1_BUILD_DEFINES
|
||||
USE_FIELD_10X26
|
||||
USE_FIELD_INV_BUILTIN
|
||||
USE_NUM_NONE
|
||||
USE_SCALAR_8X32
|
||||
USE_SCALAR_INV_BUILTIN )
|
||||
else()
|
||||
# ***Will only work for Clang on 64-bit Mac/Linux***
|
||||
set( SECP256K1_BUILD_DEFINES
|
||||
HAVE_BUILTIN_CLZLL
|
||||
HAVE_BUILTIN_EXPECT
|
||||
HAVE_DLFCN_H
|
||||
HAVE_INTTYPES_H
|
||||
HAVE_LIBCRYPTO
|
||||
HAVE_MEMORY_H
|
||||
HAVE_STDINT_H
|
||||
HAVE_STDLIB_H
|
||||
HAVE_STRINGS_H
|
||||
HAVE_STRING_H
|
||||
HAVE_SYS_STAT_H
|
||||
HAVE_SYS_TYPES_H
|
||||
HAVE_UNISTD_H
|
||||
HAVE___INT128
|
||||
STDC_HEADERS
|
||||
USE_FIELD_5X52
|
||||
USE_FIELD_INV_BUILTIN
|
||||
USE_NUM_NONE
|
||||
USE_SCALAR_4X64
|
||||
USE_SCALAR_INV_BUILTIN
|
||||
)
|
||||
endif()
|
||||
set_target_properties( secp256k1 PROPERTIES COMPILE_DEFINITIONS "${SECP256K1_BUILD_DEFINES}" LINKER_LANGUAGE C )
|
||||
file( GLOB SECP256K1_SOURCES "${SECP256K1_DIR}/src/secp256k1.c" )
|
||||
add_library( secp256k1 ${SECP256K1_SOURCES} )
|
||||
|
||||
target_include_directories( secp256k1 PRIVATE "${SECP256K1_DIR}" PUBLIC "${SECP256K1_DIR}/include" )
|
||||
|
||||
set( SECP256K1_BUILD_DEFINES
|
||||
USE_FIELD_10X26
|
||||
USE_FIELD_INV_BUILTIN
|
||||
USE_NUM_NONE
|
||||
USE_SCALAR_8X32
|
||||
USE_SCALAR_INV_BUILTIN )
|
||||
set_target_properties( secp256k1 PROPERTIES COMPILE_DEFINITIONS "${SECP256K1_BUILD_DEFINES}" LINKER_LANGUAGE C )
|
||||
else ( WIN32 )
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add( project_secp256k1
|
||||
PREFIX ${CMAKE_CURRENT_BINARY_DIR}/vendor/secp256k1-zkp
|
||||
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/vendor/secp256k1-zkp
|
||||
CONFIGURE_COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/vendor/secp256k1-zkp/configure --prefix=${CMAKE_CURRENT_BINARY_DIR}/vendor/secp256k1-zkp --with-bignum=no
|
||||
BUILD_COMMAND make
|
||||
INSTALL_COMMAND true
|
||||
BUILD_BYPRODUCTS ${CMAKE_CURRENT_BINARY_DIR}/vendor/secp256k1-zkp/src/project_secp256k1-build/.libs/libsecp256k1.a
|
||||
)
|
||||
ExternalProject_Add_Step(project_secp256k1 autogen
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/vendor/secp256k1-zkp
|
||||
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/vendor/secp256k1-zkp/autogen.sh
|
||||
DEPENDERS configure
|
||||
)
|
||||
|
||||
ExternalProject_Get_Property(project_secp256k1 binary_dir)
|
||||
|
||||
add_library(secp256k1 STATIC IMPORTED)
|
||||
set_property(TARGET secp256k1 PROPERTY IMPORTED_LOCATION ${binary_dir}/.libs/libsecp256k1${CMAKE_STATIC_LIBRARY_SUFFIX})
|
||||
set_property(TARGET secp256k1 PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}/vendor/secp256k1-zkp/include)
|
||||
add_dependencies(secp256k1 project_secp256k1)
|
||||
endif ( WIN32 )
|
||||
# End configure secp256k1-zkp
|
||||
|
||||
IF( WIN32 )
|
||||
|
|
@ -128,7 +125,6 @@ ELSE(WIN32)
|
|||
|
||||
IF(NOT APPLE)
|
||||
# Linux or other unix
|
||||
SET(SALSA_SRC vendor/salsa20/salsa20.s)
|
||||
SET(rt_library rt )
|
||||
SET(pthread_library pthread)
|
||||
ENDIF(NOT APPLE)
|
||||
|
|
@ -150,7 +146,6 @@ set( CMAKE_FIND_LIBRARY_SUFFIXES ${ORIGINAL_LIB_SUFFIXES} )
|
|||
option( UNITY_BUILD OFF )
|
||||
|
||||
set( fc_sources
|
||||
src/rpc/state.cpp
|
||||
src/uint128.cpp
|
||||
src/real128.cpp
|
||||
src/variant.cpp
|
||||
|
|
@ -182,8 +177,11 @@ set( fc_sources
|
|||
src/interprocess/signals.cpp
|
||||
src/interprocess/file_mapping.cpp
|
||||
src/interprocess/mmap_struct.cpp
|
||||
src/rpc/json_connection.cpp
|
||||
src/rpc/cli.cpp
|
||||
src/rpc/http_api.cpp
|
||||
src/rpc/json_connection.cpp
|
||||
src/rpc/state.cpp
|
||||
src/rpc/websocket_api.cpp
|
||||
src/log/log_message.cpp
|
||||
src/log/logger.cpp
|
||||
src/log/appender.cpp
|
||||
|
|
@ -212,8 +210,6 @@ set( fc_sources
|
|||
${ECC_REST}
|
||||
src/crypto/elliptic_${ECC_IMPL}.cpp
|
||||
src/crypto/rand.cpp
|
||||
src/crypto/salsa20.cpp
|
||||
#src/crypto/scrypt.cpp
|
||||
src/network/tcp_socket.cpp
|
||||
src/network/udp_socket.cpp
|
||||
src/network/udt_socket.cpp
|
||||
|
|
@ -228,17 +224,10 @@ set( fc_sources
|
|||
src/network/gntp.cpp
|
||||
src/compress/smaz.cpp
|
||||
src/compress/zlib.cpp
|
||||
src/compress/lzma.cpp
|
||||
vendor/cyoencode-1.0.2/src/CyoDecode.c
|
||||
vendor/cyoencode-1.0.2/src/CyoEncode.c
|
||||
#vendor/salsa20/ecrypt.c
|
||||
${SALSA_SRC}
|
||||
)
|
||||
|
||||
SET_PROPERTY( SOURCE
|
||||
vendor/salsa20/salsa20.s
|
||||
PROPERTY LANGUAGE C)
|
||||
|
||||
file( GLOB_RECURSE fc_headers ${CMAKE_CURRENT_SOURCE_DIR} *.hpp *.h )
|
||||
|
||||
set( sources
|
||||
|
|
@ -249,9 +238,7 @@ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/git_revision.cpp.in" "${CMAKE_CU
|
|||
list(APPEND sources "${CMAKE_CURRENT_BINARY_DIR}/git_revision.cpp")
|
||||
list(APPEND sources ${fc_headers})
|
||||
|
||||
add_subdirectory( vendor/easylzma )
|
||||
add_subdirectory( vendor/websocketpp )
|
||||
#add_subdirectory( vendor/scrypt-jane )
|
||||
add_subdirectory( vendor/udt4 )
|
||||
|
||||
setup_library( fc SOURCES ${sources} LIBRARY_TYPE STATIC DONT_INSTALL_LIBRARY )
|
||||
|
|
@ -280,7 +267,7 @@ IF(WIN32)
|
|||
_SCL_SERCURE_NO_WARNINGS
|
||||
# Needed to disable MSVC autolinking feature (#pragma comment)
|
||||
BOOST_ALL_NO_LIB
|
||||
# The current version of websockets doesn't correctly guess what 'chrono' implementation boost::asio uses
|
||||
# The current version of websockets doesn't correctly guess what 'chrono' implementation boost::asio uses
|
||||
# on the recommended build platform of VC++12/boost_1.58. Force it here until websocket gets their
|
||||
# autodetecting code to do the right thing.
|
||||
_WEBSOCKETPP_CPP11_CHRONO_
|
||||
|
|
@ -310,16 +297,13 @@ target_include_directories(fc
|
|||
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vendor/boost_1.51/include
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vendor/cyoencode-1.0.2/src
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vendor/easylzma/src
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vendor/salsa20
|
||||
#${CMAKE_CURRENT_SOURCE_DIR}/vendor/scrypt-jane
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vendor/udt4/src
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vendor/websocketpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vendor/secp256k1-zkp
|
||||
)
|
||||
|
||||
#target_link_libraries( fc PUBLIC easylzma_static scrypt udt ${Boost_LIBRARIES} ${OPENSSL_LIBRARIES} ${ZLIB_LIBRARIES} ${PLATFORM_SPECIFIC_LIBS} ${RPCRT4} ${CMAKE_DL_LIBS} ${rt_library} ${ECC_LIB} )
|
||||
target_link_libraries( fc PUBLIC -L/usr/local/lib easylzma_static udt ${Boost_LIBRARIES} ${OPENSSL_LIBRARIES} ${ZLIB_LIBRARIES} ${PLATFORM_SPECIFIC_LIBS} ${RPCRT4} ${CMAKE_DL_LIBS} ${rt_library} ${readline_libraries} ${ECC_LIB} )
|
||||
#target_link_libraries( fc PUBLIC udt ${Boost_LIBRARIES} ${OPENSSL_LIBRARIES} ${ZLIB_LIBRARIES} ${PLATFORM_SPECIFIC_LIBS} ${RPCRT4} ${CMAKE_DL_LIBS} ${rt_library} ${ECC_LIB} )
|
||||
target_link_libraries( fc PUBLIC -L/usr/local/lib udt ${Boost_LIBRARIES} ${OPENSSL_LIBRARIES} ${ZLIB_LIBRARIES} ${PLATFORM_SPECIFIC_LIBS} ${RPCRT4} ${CMAKE_DL_LIBS} ${rt_library} ${readline_libraries} ${ECC_LIB} )
|
||||
|
||||
if(MSVC)
|
||||
set_source_files_properties( src/network/http/websocket.cpp PROPERTIES COMPILE_FLAGS "/bigobj" )
|
||||
|
|
@ -364,9 +348,6 @@ target_link_libraries( udt_server fc udt )
|
|||
add_executable( udt_client tests/udtc.cpp )
|
||||
target_link_libraries( udt_client fc udt )
|
||||
|
||||
add_executable( lzma_test tests/lzma_test.cpp )
|
||||
target_link_libraries( lzma_test fc )
|
||||
|
||||
add_executable( ecc_test tests/ecc_test.cpp )
|
||||
target_link_libraries( ecc_test fc )
|
||||
|
||||
|
|
|
|||
28
include/fc/bitutil.hpp
Normal file
28
include/fc/bitutil.hpp
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
namespace fc {
|
||||
|
||||
inline uint64_t endian_reverse_u64( uint64_t x )
|
||||
{
|
||||
return (((x >> 0x38) & 0xFF) )
|
||||
| (((x >> 0x30) & 0xFF) << 0x08)
|
||||
| (((x >> 0x28) & 0xFF) << 0x10)
|
||||
| (((x >> 0x20) & 0xFF) << 0x18)
|
||||
| (((x >> 0x18) & 0xFF) << 0x20)
|
||||
| (((x >> 0x10) & 0xFF) << 0x28)
|
||||
| (((x >> 0x08) & 0xFF) << 0x30)
|
||||
| (((x ) & 0xFF) << 0x38)
|
||||
;
|
||||
}
|
||||
|
||||
inline uint32_t endian_reverse_u32( uint32_t x )
|
||||
{
|
||||
return (((x >> 0x18) & 0xFF) )
|
||||
| (((x >> 0x10) & 0xFF) << 0x08)
|
||||
| (((x >> 0x08) & 0xFF) << 0x10)
|
||||
| (((x ) & 0xFF) << 0x18)
|
||||
;
|
||||
}
|
||||
|
||||
} // namespace fc
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <fc/filesystem.hpp>
|
||||
#include <vector>
|
||||
|
||||
namespace fc {
|
||||
|
||||
std::vector<char> lzma_compress( const std::vector<char>& in );
|
||||
std::vector<char> lzma_decompress( const std::vector<char>& compressed );
|
||||
|
||||
void lzma_compress_file( const path& src_path,
|
||||
const path& dst_path,
|
||||
unsigned char level = 5,
|
||||
unsigned int dict_size = (1 << 20) );
|
||||
|
||||
void lzma_decompress_file( const path& src_path,
|
||||
const path& dst_path );
|
||||
|
||||
} // namespace fc
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
#pragma once
|
||||
#include <fc/crypto/sha256.hpp>
|
||||
|
||||
namespace fc
|
||||
{
|
||||
void salsa20_encrypt( const fc::sha256& key, uint64_t iv, const char* plain, char* cipher, uint64_t len );
|
||||
void salsa20_decrypt( const fc::sha256& key, uint64_t iv, const char* cipher, char* plain, uint64_t len );
|
||||
}
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
#pragma once
|
||||
#include <vector>
|
||||
|
||||
namespace fc {
|
||||
|
||||
void scrypt_derive_key( const std::vector<unsigned char>& passphrase, const std::vector<unsigned char>& salt,
|
||||
unsigned int n, unsigned int r, unsigned int p, std::vector<unsigned char>& key );
|
||||
|
||||
} // namespace fc
|
||||
|
|
@ -689,7 +689,7 @@ namespace fc { namespace json_relaxed
|
|||
{
|
||||
skip_white_space(in);
|
||||
variant var;
|
||||
while( char c = in.peek() )
|
||||
while( signed char c = in.peek() )
|
||||
{
|
||||
switch( c )
|
||||
{
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ namespace fc {
|
|||
|
||||
private:
|
||||
class impl;
|
||||
fwd<impl,368> my;
|
||||
fwd<impl, 392> my;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ class file_appender : public appender {
|
|||
bool rotate = false;
|
||||
microseconds rotation_interval;
|
||||
microseconds rotation_limit;
|
||||
bool rotation_compression = false;
|
||||
};
|
||||
file_appender( const variant& args );
|
||||
~file_appender();
|
||||
|
|
@ -32,4 +31,4 @@ class file_appender : public appender {
|
|||
|
||||
#include <fc/reflect/reflect.hpp>
|
||||
FC_REFLECT( fc::file_appender::config,
|
||||
(format)(filename)(flush)(rotate)(rotation_interval)(rotation_limit)(rotation_compression) )
|
||||
(format)(filename)(flush)(rotate)(rotation_interval)(rotation_limit) )
|
||||
|
|
|
|||
|
|
@ -16,83 +16,25 @@ namespace fc { namespace rpc {
|
|||
class cli : public api_connection
|
||||
{
|
||||
public:
|
||||
~cli()
|
||||
{
|
||||
if( _run_complete.valid() )
|
||||
{
|
||||
stop();
|
||||
}
|
||||
}
|
||||
virtual variant send_call( api_id_type api_id, string method_name, variants args = variants() )
|
||||
{
|
||||
FC_ASSERT(false);
|
||||
}
|
||||
virtual variant send_callback( uint64_t callback_id, variants args = variants() )
|
||||
{
|
||||
FC_ASSERT(false);
|
||||
}
|
||||
virtual void send_notice( uint64_t callback_id, variants args = variants() )
|
||||
{
|
||||
FC_ASSERT(false);
|
||||
}
|
||||
~cli();
|
||||
|
||||
void start()
|
||||
{
|
||||
_run_complete = fc::async( [&](){ run(); } );
|
||||
}
|
||||
void stop()
|
||||
{
|
||||
_run_complete.cancel();
|
||||
_run_complete.wait();
|
||||
}
|
||||
void wait(){ _run_complete.wait(); }
|
||||
void format_result( const string& method, std::function<string(variant,const variants&)> formatter)
|
||||
{
|
||||
_result_formatters[method] = formatter;
|
||||
}
|
||||
virtual variant send_call( api_id_type api_id, string method_name, variants args = variants() );
|
||||
virtual variant send_callback( uint64_t callback_id, variants args = variants() );
|
||||
virtual void send_notice( uint64_t callback_id, variants args = variants() );
|
||||
|
||||
void start();
|
||||
void stop();
|
||||
void wait();
|
||||
void format_result( const string& method, std::function<string(variant,const variants&)> formatter);
|
||||
|
||||
virtual void getline( const fc::string& prompt, fc::string& line );
|
||||
|
||||
void set_prompt( const string& prompt ) { _prompt = prompt; }
|
||||
void set_prompt( const string& prompt );
|
||||
|
||||
private:
|
||||
void run()
|
||||
{
|
||||
while( !_run_complete.canceled() )
|
||||
{
|
||||
try {
|
||||
std::string line;
|
||||
try
|
||||
{
|
||||
getline( _prompt.c_str(), line );
|
||||
}
|
||||
catch ( const fc::eof_exception& e )
|
||||
{
|
||||
break;
|
||||
}
|
||||
std::cout << line << "\n";
|
||||
line += char(EOF);
|
||||
fc::variants args = fc::json::variants_from_string(line);;
|
||||
if( args.size() == 0 ) continue;
|
||||
|
||||
const string& method = args[0].get_string();
|
||||
void run();
|
||||
|
||||
auto result = receive_call( 0, method, variants( args.begin()+1,args.end() ) );
|
||||
auto itr = _result_formatters.find( method );
|
||||
if( itr == _result_formatters.end() )
|
||||
{
|
||||
std::cout << fc::json::to_pretty_string( result ) << "\n";
|
||||
}
|
||||
else
|
||||
std::cout << itr->second( result, args ) << "\n";
|
||||
}
|
||||
catch ( const fc::exception& e )
|
||||
{
|
||||
std::cout << e.to_detail_string() << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
std::string _prompt = ">>>";
|
||||
std::string _prompt = ">>>";
|
||||
std::map<string,std::function<string(variant,const variants&)> > _result_formatters;
|
||||
fc::future<void> _run_complete;
|
||||
};
|
||||
|
|
|
|||
35
include/fc/rpc/http_api.hpp
Normal file
35
include/fc/rpc/http_api.hpp
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
#pragma once
|
||||
#include <fc/io/json.hpp>
|
||||
#include <fc/network/http/connection.hpp>
|
||||
#include <fc/network/http/server.hpp>
|
||||
#include <fc/reflect/variant.hpp>
|
||||
#include <fc/rpc/api_connection.hpp>
|
||||
#include <fc/rpc/state.hpp>
|
||||
|
||||
namespace fc { namespace rpc {
|
||||
|
||||
class http_api_connection : public api_connection
|
||||
{
|
||||
public:
|
||||
http_api_connection();
|
||||
~http_api_connection();
|
||||
|
||||
virtual variant send_call(
|
||||
api_id_type api_id,
|
||||
string method_name,
|
||||
variants args = variants() ) override;
|
||||
virtual variant send_callback(
|
||||
uint64_t callback_id,
|
||||
variants args = variants() ) override;
|
||||
virtual void send_notice(
|
||||
uint64_t callback_id,
|
||||
variants args = variants() ) override;
|
||||
|
||||
void on_request(
|
||||
const fc::http::request& req,
|
||||
const fc::http::server::response& resp );
|
||||
|
||||
fc::rpc::state _rpc_state;
|
||||
};
|
||||
|
||||
} } // namespace fc::rpc
|
||||
|
|
@ -10,104 +10,25 @@ namespace fc { namespace rpc {
|
|||
class websocket_api_connection : public api_connection
|
||||
{
|
||||
public:
|
||||
~websocket_api_connection()
|
||||
{
|
||||
}
|
||||
|
||||
websocket_api_connection( fc::http::websocket_connection& c )
|
||||
:_connection(c)
|
||||
{
|
||||
_rpc_state.add_method( "call", [this]( const variants& args ) -> variant {
|
||||
FC_ASSERT( args.size() == 3 && args[2].is_array() );
|
||||
return this->receive_call( args[0].as_uint64(),
|
||||
args[1].as_string(),
|
||||
args[2].get_array() );
|
||||
});
|
||||
|
||||
_rpc_state.add_method( "notice", [this]( const variants& args ) -> variant {
|
||||
FC_ASSERT( args.size() == 2 && args[1].is_array() );
|
||||
this->receive_notice( args[0].as_uint64(), args[1].get_array() );
|
||||
return variant();
|
||||
});
|
||||
|
||||
_rpc_state.add_method( "callback", [this]( const variants& args ) -> variant {
|
||||
FC_ASSERT( args.size() == 2 && args[1].is_array() );
|
||||
this->receive_callback( args[0].as_uint64(), args[1].get_array() );
|
||||
return variant();
|
||||
});
|
||||
|
||||
_rpc_state.on_unhandled( [&]( const std::string& method_name, const variants& args ){
|
||||
return this->receive_call( 0, method_name, args );
|
||||
});
|
||||
|
||||
_connection.on_message_handler( [&]( const std::string& msg ){ on_message(msg,true); } );
|
||||
_connection.on_http_handler( [&]( const std::string& msg ){ return on_message(msg,false); } );
|
||||
_connection.closed.connect( [this](){ closed(); } );
|
||||
}
|
||||
|
||||
virtual variant send_call( api_id_type api_id,
|
||||
string method_name,
|
||||
variants args = variants() ) override
|
||||
{
|
||||
auto request = _rpc_state.start_remote_call( "call", {api_id, std::move(method_name), std::move(args) } );
|
||||
_connection.send_message( fc::json::to_string(request) );
|
||||
return _rpc_state.wait_for_response( *request.id );
|
||||
}
|
||||
virtual variant send_callback( uint64_t callback_id, variants args = variants() ) override
|
||||
{
|
||||
auto request = _rpc_state.start_remote_call( "callback", {callback_id, std::move(args) } );
|
||||
_connection.send_message( fc::json::to_string(request) );
|
||||
return _rpc_state.wait_for_response( *request.id );
|
||||
}
|
||||
virtual void send_notice( uint64_t callback_id, variants args = variants() ) override
|
||||
{
|
||||
fc::rpc::request req{ optional<uint64_t>(), "notice", {callback_id, std::move(args)}};
|
||||
_connection.send_message( fc::json::to_string(req) );
|
||||
}
|
||||
websocket_api_connection( fc::http::websocket_connection& c );
|
||||
~websocket_api_connection();
|
||||
|
||||
virtual variant send_call(
|
||||
api_id_type api_id,
|
||||
string method_name,
|
||||
variants args = variants() ) override;
|
||||
virtual variant send_callback(
|
||||
uint64_t callback_id,
|
||||
variants args = variants() ) override;
|
||||
virtual void send_notice(
|
||||
uint64_t callback_id,
|
||||
variants args = variants() ) override;
|
||||
|
||||
protected:
|
||||
std::string on_message( const std::string& message, bool send_message = true )
|
||||
{
|
||||
try {
|
||||
auto var = fc::json::from_string(message);
|
||||
const auto& var_obj = var.get_object();
|
||||
if( var_obj.contains( "method" ) )
|
||||
{
|
||||
auto call = var.as<fc::rpc::request>();
|
||||
try {
|
||||
auto result = _rpc_state.local_call( call.method, call.params );
|
||||
if( call.id )
|
||||
{
|
||||
auto reply = fc::json::to_string( response( *call.id, result ) );
|
||||
if( send_message )
|
||||
_connection.send_message( reply );
|
||||
return reply;
|
||||
}
|
||||
}
|
||||
catch ( const fc::exception& e )
|
||||
{
|
||||
if( call.id )
|
||||
{
|
||||
auto reply = fc::json::to_string( response( *call.id, error_object{ 1, e.to_detail_string(), fc::variant(e)} ) );
|
||||
if( send_message )
|
||||
_connection.send_message( reply );
|
||||
std::string on_message(
|
||||
const std::string& message,
|
||||
bool send_message = true );
|
||||
|
||||
return reply;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto reply = var.as<fc::rpc::response>();
|
||||
_rpc_state.handle_reply( reply );
|
||||
}
|
||||
} catch ( const fc::exception& e ) {
|
||||
wdump((e.to_detail_string()));
|
||||
return e.to_detail_string();
|
||||
}
|
||||
return string();
|
||||
}
|
||||
fc::http::websocket_connection& _connection;
|
||||
fc::rpc::state _rpc_state;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -111,27 +111,27 @@ struct storage_ops<N, T, Ts...> {
|
|||
template<int N>
|
||||
struct storage_ops<N> {
|
||||
static void del(int n, void *data) {
|
||||
FC_ASSERT( !"Internal error: static_variant tag is invalid.");
|
||||
FC_THROW_EXCEPTION( fc::assert_exception, "Internal error: static_variant tag is invalid.");
|
||||
}
|
||||
static void con(int n, void *data) {
|
||||
FC_ASSERT( !"Internal error: static_variant tag is invalid." );
|
||||
FC_THROW_EXCEPTION( fc::assert_exception, "Internal error: static_variant tag is invalid." );
|
||||
}
|
||||
|
||||
template<typename visitor>
|
||||
static typename visitor::result_type apply(int n, void *data, visitor& v) {
|
||||
FC_ASSERT( !"Internal error: static_variant tag is invalid." );
|
||||
FC_THROW_EXCEPTION( fc::assert_exception, "Internal error: static_variant tag is invalid." );
|
||||
}
|
||||
template<typename visitor>
|
||||
static typename visitor::result_type apply(int n, void *data, const visitor& v) {
|
||||
FC_ASSERT( !"Internal error: static_variant tag is invalid." );
|
||||
FC_THROW_EXCEPTION( fc::assert_exception, "Internal error: static_variant tag is invalid." );
|
||||
}
|
||||
template<typename visitor>
|
||||
static typename visitor::result_type apply(int n, const void *data, visitor& v) {
|
||||
FC_ASSERT( !"Internal error: static_variant tag is invalid." );
|
||||
FC_THROW_EXCEPTION( fc::assert_exception, "Internal error: static_variant tag is invalid." );
|
||||
}
|
||||
template<typename visitor>
|
||||
static typename visitor::result_type apply(int n, const void *data, const visitor& v) {
|
||||
FC_ASSERT( !"Internal error: static_variant tag is invalid." );
|
||||
FC_THROW_EXCEPTION( fc::assert_exception, "Internal error: static_variant tag is invalid." );
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -286,8 +286,7 @@ public:
|
|||
if(_tag == impl::position<X, Types...>::pos) {
|
||||
return *reinterpret_cast<X*>(storage);
|
||||
} else {
|
||||
FC_ASSERT( !"static_variant does not contain a value of type",
|
||||
"type ${t}", ("t",fc::get_typename<X>::name()) );
|
||||
FC_THROW_EXCEPTION( fc::assert_exception, "static_variant does not contain a value of type ${t}", ("t",fc::get_typename<X>::name()) );
|
||||
// std::string("static_variant does not contain value of type ") + typeid(X).name()
|
||||
// );
|
||||
}
|
||||
|
|
@ -301,8 +300,7 @@ public:
|
|||
if(_tag == impl::position<X, Types...>::pos) {
|
||||
return *reinterpret_cast<const X*>(storage);
|
||||
} else {
|
||||
FC_ASSERT( !"static_variant does not contain a value of type",
|
||||
"type ${t}", ("t",fc::get_typename<X>::name()) );
|
||||
FC_THROW_EXCEPTION( fc::assert_exception, "static_variant does not contain a value of type ${t}", ("t",fc::get_typename<X>::name()) );
|
||||
}
|
||||
}
|
||||
template<typename visitor>
|
||||
|
|
|
|||
|
|
@ -1,201 +0,0 @@
|
|||
#include <boost/filesystem/path.hpp>
|
||||
#include <boost/iostreams/device/mapped_file.hpp>
|
||||
#include <fc/compress/lzma.hpp>
|
||||
#include <fc/exception/exception.hpp>
|
||||
#include <fc/io/fstream.hpp>
|
||||
#include <lzma_c.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
namespace fc {
|
||||
|
||||
std::vector<char> lzma_compress(const std::vector<char>& in)
|
||||
{
|
||||
FC_ASSERT(!in.empty());
|
||||
|
||||
const unsigned char* in_data = reinterpret_cast<const unsigned char*> (&in[0]);;
|
||||
unsigned char* out_data;
|
||||
size_t out_len = 0;
|
||||
|
||||
int ret = simpleCompress(elzma_file_format::ELZMA_lzma, in_data, in.size(),
|
||||
&out_data, &out_len);
|
||||
|
||||
if(ret != 0)
|
||||
{
|
||||
FC_ASSERT(0);
|
||||
return std::vector<char>();
|
||||
}
|
||||
|
||||
std::vector<char> out(out_data, out_data+out_len);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
std::vector<char> lzma_decompress( const std::vector<char>& compressed )
|
||||
{
|
||||
FC_ASSERT(!compressed.empty());
|
||||
|
||||
const unsigned char* in_data = reinterpret_cast<const unsigned char*> (&compressed[0]);;
|
||||
unsigned char* out_data;
|
||||
size_t out_len = 0;
|
||||
|
||||
|
||||
int ret = simpleDecompress(elzma_file_format::ELZMA_lzma, in_data, compressed.size(),
|
||||
&out_data, &out_len);
|
||||
|
||||
if(ret != 0)
|
||||
{
|
||||
FC_ASSERT(0);
|
||||
return std::vector<char>();
|
||||
}
|
||||
|
||||
std::vector<char> out(out_data, out_data+out_len);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
struct lzma_file_ctx
|
||||
{
|
||||
const unsigned char* src_buf;
|
||||
size_t src_len;
|
||||
|
||||
path dst_path;
|
||||
};
|
||||
|
||||
static int lzma_file_input_callback( void* input_ctx, void* input_buf, size_t* input_len )
|
||||
{
|
||||
FC_ASSERT( input_ctx != NULL );
|
||||
FC_ASSERT( input_buf != NULL );
|
||||
|
||||
const auto ctx = ( struct lzma_file_ctx* )input_ctx;
|
||||
const auto size = ( ctx->src_len < *input_len ) ? ctx->src_len : *input_len;
|
||||
|
||||
if( size > 0 )
|
||||
{
|
||||
memcpy( input_buf, ( void * )ctx->src_buf, size );
|
||||
ctx->src_buf += size;
|
||||
ctx->src_len -= size;
|
||||
}
|
||||
|
||||
*input_len = size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t lzma_file_output_callback( void* output_ctx, const void* output_buf, size_t output_len )
|
||||
{
|
||||
FC_ASSERT( output_ctx != NULL );
|
||||
FC_ASSERT( output_buf != NULL );
|
||||
|
||||
const auto ctx = ( struct lzma_file_ctx* )output_ctx;
|
||||
|
||||
if( output_len > 0 )
|
||||
{
|
||||
size_t dst_len = 0;
|
||||
if( !exists( ctx->dst_path ) )
|
||||
{
|
||||
ofstream fs( ctx->dst_path );
|
||||
fs.close();
|
||||
}
|
||||
else
|
||||
{
|
||||
dst_len = file_size( ctx->dst_path );
|
||||
}
|
||||
|
||||
resize_file( ctx->dst_path, dst_len + output_len );
|
||||
|
||||
boost::iostreams::mapped_file_sink dst_file;
|
||||
dst_file.open( (boost::filesystem::path)ctx->dst_path );
|
||||
FC_ASSERT( dst_file.is_open() );
|
||||
|
||||
memcpy( ( void* )(dst_file.data() + dst_len), output_buf, output_len);
|
||||
|
||||
dst_file.close();
|
||||
}
|
||||
|
||||
return output_len;
|
||||
}
|
||||
|
||||
void lzma_compress_file( const path& src_path,
|
||||
const path& dst_path,
|
||||
unsigned char level,
|
||||
unsigned int dict_size )
|
||||
{
|
||||
FC_ASSERT( exists( src_path ) );
|
||||
FC_ASSERT( !exists( dst_path ) );
|
||||
|
||||
boost::iostreams::mapped_file_source src_file;
|
||||
src_file.open( (boost::filesystem::path)src_path );
|
||||
FC_ASSERT( src_file.is_open() );
|
||||
|
||||
elzma_compress_handle handle = NULL;
|
||||
handle = elzma_compress_alloc();
|
||||
FC_ASSERT( handle != NULL );
|
||||
|
||||
struct lzma_file_ctx ctx;
|
||||
ctx.src_buf = ( const unsigned char* )src_file.data();
|
||||
ctx.src_len = src_file.size();
|
||||
ctx.dst_path = dst_path;
|
||||
|
||||
auto rc = elzma_compress_config( handle,
|
||||
ELZMA_LC_DEFAULT,
|
||||
ELZMA_LP_DEFAULT,
|
||||
ELZMA_PB_DEFAULT,
|
||||
level,
|
||||
dict_size,
|
||||
elzma_file_format::ELZMA_lzma,
|
||||
ctx.src_len );
|
||||
|
||||
try
|
||||
{
|
||||
FC_ASSERT( rc == ELZMA_E_OK );
|
||||
}
|
||||
catch( ... )
|
||||
{
|
||||
elzma_compress_free( &handle );
|
||||
throw;
|
||||
}
|
||||
|
||||
rc = elzma_compress_run( handle,
|
||||
lzma_file_input_callback,
|
||||
( void * )&ctx,
|
||||
lzma_file_output_callback,
|
||||
( void * )&ctx,
|
||||
NULL,
|
||||
NULL );
|
||||
|
||||
elzma_compress_free( &handle );
|
||||
FC_ASSERT( rc == ELZMA_E_OK );
|
||||
}
|
||||
|
||||
void lzma_decompress_file( const path& src_path,
|
||||
const path& dst_path )
|
||||
{
|
||||
FC_ASSERT( exists( src_path ) );
|
||||
FC_ASSERT( !exists( dst_path ) );
|
||||
|
||||
boost::iostreams::mapped_file_source src_file;
|
||||
src_file.open( (boost::filesystem::path)src_path );
|
||||
FC_ASSERT( src_file.is_open() );
|
||||
|
||||
elzma_decompress_handle handle = NULL;
|
||||
handle = elzma_decompress_alloc();
|
||||
FC_ASSERT( handle != NULL );
|
||||
|
||||
struct lzma_file_ctx ctx;
|
||||
ctx.src_buf = ( const unsigned char* )src_file.data();
|
||||
ctx.src_len = src_file.size();
|
||||
ctx.dst_path = dst_path;
|
||||
|
||||
auto rc = elzma_decompress_run( handle,
|
||||
lzma_file_input_callback,
|
||||
( void * )&ctx,
|
||||
lzma_file_output_callback,
|
||||
( void * )&ctx,
|
||||
elzma_file_format::ELZMA_lzma );
|
||||
|
||||
elzma_decompress_free( &handle );
|
||||
FC_ASSERT( rc == ELZMA_E_OK );
|
||||
}
|
||||
|
||||
} // namespace fc
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
#include <fc/crypto/salsa20.hpp>
|
||||
extern "C" {
|
||||
#include <ecrypt-sync.h>
|
||||
}
|
||||
|
||||
namespace fc
|
||||
{
|
||||
static bool salsa20_init = []() -> bool { ECRYPT_init(); return true; }();
|
||||
|
||||
void salsa20_encrypt( const fc::sha256& key, uint64_t iv, const char* plain, char* cipher, uint64_t len )
|
||||
{
|
||||
ECRYPT_ctx ctx;
|
||||
ECRYPT_keysetup( &ctx, (unsigned char*)&key, ECRYPT_MAXIVSIZE, ECRYPT_MAXKEYSIZE );
|
||||
ECRYPT_ivsetup( &ctx, (unsigned char*)&iv );
|
||||
|
||||
ECRYPT_encrypt_bytes( &ctx, (const unsigned char*)plain, (unsigned char*)cipher, len );
|
||||
}
|
||||
void salsa20_decrypt( const fc::sha256& key, uint64_t iv, const char* cipher, char* plain, uint64_t len )
|
||||
{
|
||||
}
|
||||
}
|
||||
|
|
@ -1,22 +0,0 @@
|
|||
#include <fc/crypto/scrypt.hpp>
|
||||
#include <fc/exception/exception.hpp>
|
||||
#include "scrypt-jane.h"
|
||||
|
||||
namespace fc {
|
||||
|
||||
unsigned log2( unsigned n )
|
||||
{
|
||||
if( n <= 0 ) FC_THROW_EXCEPTION( exception, "cannot take log2(${n})", ("n",n) );
|
||||
unsigned i = 0;
|
||||
while( n >>= 1 ) ++i;
|
||||
return i;
|
||||
}
|
||||
|
||||
void scrypt_derive_key( const std::vector<unsigned char>& passphrase, const std::vector<unsigned char>& salt,
|
||||
unsigned int n, unsigned int r, unsigned int p, std::vector<unsigned char>& key )
|
||||
{
|
||||
scrypt( passphrase.data(), passphrase.size(), salt.data(), salt.size(),
|
||||
log2( n ) - 1, log2( r ), log2( p ), key.data(), key.capacity() );
|
||||
}
|
||||
|
||||
} // namespace fc
|
||||
|
|
@ -390,7 +390,7 @@ namespace fc
|
|||
{
|
||||
skip_white_space(in);
|
||||
variant var;
|
||||
while( char c = in.peek() )
|
||||
while( signed char c = in.peek() )
|
||||
{
|
||||
switch( c )
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
#include <fc/compress/lzma.hpp>
|
||||
#include <fc/exception/exception.hpp>
|
||||
#include <fc/io/fstream.hpp>
|
||||
#include <fc/log/file_appender.hpp>
|
||||
|
|
@ -13,8 +12,6 @@
|
|||
|
||||
namespace fc {
|
||||
|
||||
static const string compression_extension( ".lzma" );
|
||||
|
||||
class file_appender::impl : public fc::retainable
|
||||
{
|
||||
public:
|
||||
|
|
@ -25,7 +22,6 @@ namespace fc {
|
|||
private:
|
||||
future<void> _rotation_task;
|
||||
time_point_sec _current_file_start_time;
|
||||
std::unique_ptr<thread> _compression_thread;
|
||||
|
||||
time_point_sec get_file_start_time( const time_point_sec& timestamp, const microseconds& interval )
|
||||
{
|
||||
|
|
@ -34,26 +30,6 @@ namespace fc {
|
|||
return time_point_sec( (uint32_t)(file_number * interval_seconds) );
|
||||
}
|
||||
|
||||
void compress_file( const fc::path& filename )
|
||||
{
|
||||
FC_ASSERT( cfg.rotate && cfg.rotation_compression );
|
||||
FC_ASSERT( _compression_thread );
|
||||
if( !_compression_thread->is_current() )
|
||||
{
|
||||
_compression_thread->async( [this, filename]() { compress_file( filename ); }, "compress_file" ).wait();
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
lzma_compress_file( filename, filename.parent_path() / (filename.filename().string() + compression_extension) );
|
||||
remove_all( filename );
|
||||
}
|
||||
catch( ... )
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
impl( const config& c) : cfg( c )
|
||||
{
|
||||
|
|
@ -62,9 +38,6 @@ namespace fc {
|
|||
FC_ASSERT( cfg.rotation_interval >= seconds( 1 ) );
|
||||
FC_ASSERT( cfg.rotation_limit >= cfg.rotation_interval );
|
||||
|
||||
if( cfg.rotation_compression )
|
||||
_compression_thread.reset( new thread( "compression") );
|
||||
|
||||
_rotation_task = async( [this]() { rotate_files( true ); }, "rotate_files(1)" );
|
||||
}
|
||||
}
|
||||
|
|
@ -132,17 +105,11 @@ namespace fc {
|
|||
remove_all( *itr );
|
||||
continue;
|
||||
}
|
||||
|
||||
if( !cfg.rotation_compression )
|
||||
continue;
|
||||
if( current_filename.find( compression_extension ) != string::npos )
|
||||
continue;
|
||||
compress_file( *itr );
|
||||
}
|
||||
}
|
||||
catch (const fc::canceled_exception&)
|
||||
{
|
||||
throw;
|
||||
throw;
|
||||
}
|
||||
catch( ... )
|
||||
{
|
||||
|
|
@ -160,8 +127,7 @@ namespace fc {
|
|||
format( "${timestamp} ${thread_name} ${context} ${file}:${line} ${method} ${level}] ${message}" ),
|
||||
filename(p),
|
||||
flush(true),
|
||||
rotate(false),
|
||||
rotation_compression(false)
|
||||
rotate(false)
|
||||
{}
|
||||
|
||||
file_appender::file_appender( const variant& args ) :
|
||||
|
|
|
|||
|
|
@ -30,6 +30,94 @@
|
|||
|
||||
namespace fc { namespace rpc {
|
||||
|
||||
cli::~cli()
|
||||
{
|
||||
if( _run_complete.valid() )
|
||||
{
|
||||
stop();
|
||||
}
|
||||
}
|
||||
|
||||
variant cli::send_call( api_id_type api_id, string method_name, variants args /* = variants() */ )
|
||||
{
|
||||
FC_ASSERT(false);
|
||||
}
|
||||
|
||||
variant cli::send_callback( uint64_t callback_id, variants args /* = variants() */ )
|
||||
{
|
||||
FC_ASSERT(false);
|
||||
}
|
||||
|
||||
void cli::send_notice( uint64_t callback_id, variants args /* = variants() */ )
|
||||
{
|
||||
FC_ASSERT(false);
|
||||
}
|
||||
|
||||
void cli::start()
|
||||
{
|
||||
_run_complete = fc::async( [&](){ run(); } );
|
||||
}
|
||||
|
||||
void cli::stop()
|
||||
{
|
||||
_run_complete.cancel();
|
||||
_run_complete.wait();
|
||||
}
|
||||
|
||||
void cli::wait()
|
||||
{
|
||||
_run_complete.wait();
|
||||
}
|
||||
|
||||
void cli::format_result( const string& method, std::function<string(variant,const variants&)> formatter)
|
||||
{
|
||||
_result_formatters[method] = formatter;
|
||||
}
|
||||
|
||||
void cli::set_prompt( const string& prompt )
|
||||
{
|
||||
_prompt = prompt;
|
||||
}
|
||||
|
||||
void cli::run()
|
||||
{
|
||||
while( !_run_complete.canceled() )
|
||||
{
|
||||
try
|
||||
{
|
||||
std::string line;
|
||||
try
|
||||
{
|
||||
getline( _prompt.c_str(), line );
|
||||
}
|
||||
catch ( const fc::eof_exception& e )
|
||||
{
|
||||
break;
|
||||
}
|
||||
std::cout << line << "\n";
|
||||
line += char(EOF);
|
||||
fc::variants args = fc::json::variants_from_string(line);;
|
||||
if( args.size() == 0 )
|
||||
continue;
|
||||
|
||||
const string& method = args[0].get_string();
|
||||
|
||||
auto result = receive_call( 0, method, variants( args.begin()+1,args.end() ) );
|
||||
auto itr = _result_formatters.find( method );
|
||||
if( itr == _result_formatters.end() )
|
||||
{
|
||||
std::cout << fc::json::to_pretty_string( result ) << "\n";
|
||||
}
|
||||
else
|
||||
std::cout << itr->second( result, args ) << "\n";
|
||||
}
|
||||
catch ( const fc::exception& e )
|
||||
{
|
||||
std::cout << e.to_detail_string() << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cli::getline( const fc::string& prompt, fc::string& line)
|
||||
{
|
||||
// getting file descriptor for C++ streams is near impossible
|
||||
|
|
@ -69,4 +157,4 @@ void cli::getline( const fc::string& prompt, fc::string& line)
|
|||
}
|
||||
}
|
||||
|
||||
} }
|
||||
} } // namespace fc::rpc
|
||||
|
|
|
|||
123
src/rpc/http_api.cpp
Normal file
123
src/rpc/http_api.cpp
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
|
||||
#include <fc/rpc/http_api.hpp>
|
||||
|
||||
namespace fc { namespace rpc {
|
||||
|
||||
http_api_connection::~http_api_connection()
|
||||
{
|
||||
}
|
||||
|
||||
http_api_connection::http_api_connection()
|
||||
{
|
||||
_rpc_state.add_method( "call", [this]( const variants& args ) -> variant
|
||||
{
|
||||
FC_ASSERT( args.size() == 3 && args[2].is_array() );
|
||||
return this->receive_call(
|
||||
args[0].as_uint64(),
|
||||
args[1].as_string(),
|
||||
args[2].get_array() );
|
||||
} );
|
||||
|
||||
_rpc_state.add_method( "notice", [this]( const variants& args ) -> variant
|
||||
{
|
||||
FC_ASSERT( args.size() == 2 && args[1].is_array() );
|
||||
this->receive_notice(
|
||||
args[0].as_uint64(),
|
||||
args[1].get_array() );
|
||||
return variant();
|
||||
} );
|
||||
|
||||
_rpc_state.add_method( "callback", [this]( const variants& args ) -> variant
|
||||
{
|
||||
FC_ASSERT( args.size() == 2 && args[1].is_array() );
|
||||
this->receive_callback(
|
||||
args[0].as_uint64(),
|
||||
args[1].get_array() );
|
||||
return variant();
|
||||
} );
|
||||
|
||||
_rpc_state.on_unhandled( [&]( const std::string& method_name, const variants& args )
|
||||
{
|
||||
return this->receive_call( 0, method_name, args );
|
||||
} );
|
||||
}
|
||||
|
||||
variant http_api_connection::send_call(
|
||||
api_id_type api_id,
|
||||
string method_name,
|
||||
variants args /* = variants() */ )
|
||||
{
|
||||
// HTTP has no way to do this, so do nothing
|
||||
return variant();
|
||||
}
|
||||
|
||||
variant http_api_connection::send_callback(
|
||||
uint64_t callback_id,
|
||||
variants args /* = variants() */ )
|
||||
{
|
||||
// HTTP has no way to do this, so do nothing
|
||||
return variant();
|
||||
}
|
||||
|
||||
void http_api_connection::send_notice(
|
||||
uint64_t callback_id,
|
||||
variants args /* = variants() */ )
|
||||
{
|
||||
// HTTP has no way to do this, so do nothing
|
||||
return;
|
||||
}
|
||||
|
||||
void http_api_connection::on_request( const fc::http::request& req, const fc::http::server::response& resp )
|
||||
{
|
||||
// this must be called by outside HTTP server's on_request method
|
||||
std::string resp_body;
|
||||
http::reply::status_code resp_status;
|
||||
|
||||
try
|
||||
{
|
||||
resp.add_header( "Content-Type", "application/json" );
|
||||
std::string req_body( req.body.begin(), req.body.end() );
|
||||
auto var = fc::json::from_string( req_body );
|
||||
const auto& var_obj = var.get_object();
|
||||
|
||||
if( var_obj.contains( "method" ) )
|
||||
{
|
||||
auto call = var.as<fc::rpc::request>();
|
||||
try
|
||||
{
|
||||
auto result = _rpc_state.local_call( call.method, call.params );
|
||||
resp_body = fc::json::to_string( fc::rpc::response( *call.id, result ) );
|
||||
resp_status = http::reply::OK;
|
||||
}
|
||||
catch ( const fc::exception& e )
|
||||
{
|
||||
resp_body = fc::json::to_string( fc::rpc::response( *call.id, error_object{ 1, e.to_detail_string(), fc::variant(e)} ) );
|
||||
resp_status = http::reply::InternalServerError;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
resp_status = http::reply::BadRequest;
|
||||
resp_body = "";
|
||||
}
|
||||
}
|
||||
catch ( const fc::exception& e )
|
||||
{
|
||||
resp_status = http::reply::InternalServerError;
|
||||
resp_body = "";
|
||||
wdump((e.to_detail_string()));
|
||||
}
|
||||
try
|
||||
{
|
||||
resp.set_status( resp_status );
|
||||
resp.set_length( resp_body.length() );
|
||||
resp.write( resp_body.c_str(), resp_body.length() );
|
||||
}
|
||||
catch( const fc::exception& e )
|
||||
{
|
||||
wdump((e.to_detail_string()));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
} } // namespace fc::rpc
|
||||
121
src/rpc/websocket_api.cpp
Normal file
121
src/rpc/websocket_api.cpp
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
|
||||
#include <fc/rpc/websocket_api.hpp>
|
||||
|
||||
namespace fc { namespace rpc {
|
||||
|
||||
websocket_api_connection::~websocket_api_connection()
|
||||
{
|
||||
}
|
||||
|
||||
websocket_api_connection::websocket_api_connection( fc::http::websocket_connection& c )
|
||||
: _connection(c)
|
||||
{
|
||||
_rpc_state.add_method( "call", [this]( const variants& args ) -> variant
|
||||
{
|
||||
FC_ASSERT( args.size() == 3 && args[2].is_array() );
|
||||
return this->receive_call(
|
||||
args[0].as_uint64(),
|
||||
args[1].as_string(),
|
||||
args[2].get_array() );
|
||||
} );
|
||||
|
||||
_rpc_state.add_method( "notice", [this]( const variants& args ) -> variant
|
||||
{
|
||||
FC_ASSERT( args.size() == 2 && args[1].is_array() );
|
||||
this->receive_notice( args[0].as_uint64(), args[1].get_array() );
|
||||
return variant();
|
||||
} );
|
||||
|
||||
_rpc_state.add_method( "callback", [this]( const variants& args ) -> variant
|
||||
{
|
||||
FC_ASSERT( args.size() == 2 && args[1].is_array() );
|
||||
this->receive_callback( args[0].as_uint64(), args[1].get_array() );
|
||||
return variant();
|
||||
} );
|
||||
|
||||
_rpc_state.on_unhandled( [&]( const std::string& method_name, const variants& args )
|
||||
{
|
||||
return this->receive_call( 0, method_name, args );
|
||||
} );
|
||||
|
||||
_connection.on_message_handler( [&]( const std::string& msg ){ on_message(msg,true); } );
|
||||
_connection.on_http_handler( [&]( const std::string& msg ){ return on_message(msg,false); } );
|
||||
_connection.closed.connect( [this](){ closed(); } );
|
||||
}
|
||||
|
||||
variant websocket_api_connection::send_call(
|
||||
api_id_type api_id,
|
||||
string method_name,
|
||||
variants args /* = variants() */ )
|
||||
{
|
||||
auto request = _rpc_state.start_remote_call( "call", {api_id, std::move(method_name), std::move(args) } );
|
||||
_connection.send_message( fc::json::to_string(request) );
|
||||
return _rpc_state.wait_for_response( *request.id );
|
||||
}
|
||||
|
||||
variant websocket_api_connection::send_callback(
|
||||
uint64_t callback_id,
|
||||
variants args /* = variants() */ )
|
||||
{
|
||||
auto request = _rpc_state.start_remote_call( "callback", {callback_id, std::move(args) } );
|
||||
_connection.send_message( fc::json::to_string(request) );
|
||||
return _rpc_state.wait_for_response( *request.id );
|
||||
}
|
||||
|
||||
void websocket_api_connection::send_notice(
|
||||
uint64_t callback_id,
|
||||
variants args /* = variants() */ )
|
||||
{
|
||||
fc::rpc::request req{ optional<uint64_t>(), "notice", {callback_id, std::move(args)}};
|
||||
_connection.send_message( fc::json::to_string(req) );
|
||||
}
|
||||
|
||||
std::string websocket_api_connection::on_message(
|
||||
const std::string& message,
|
||||
bool send_message /* = true */ )
|
||||
{
|
||||
try
|
||||
{
|
||||
auto var = fc::json::from_string(message);
|
||||
const auto& var_obj = var.get_object();
|
||||
if( var_obj.contains( "method" ) )
|
||||
{
|
||||
auto call = var.as<fc::rpc::request>();
|
||||
try
|
||||
{
|
||||
auto result = _rpc_state.local_call( call.method, call.params );
|
||||
if( call.id )
|
||||
{
|
||||
auto reply = fc::json::to_string( response( *call.id, result ) );
|
||||
if( send_message )
|
||||
_connection.send_message( reply );
|
||||
return reply;
|
||||
}
|
||||
}
|
||||
catch ( const fc::exception& e )
|
||||
{
|
||||
if( call.id )
|
||||
{
|
||||
auto reply = fc::json::to_string( response( *call.id, error_object{ 1, e.to_detail_string(), fc::variant(e)} ) );
|
||||
if( send_message )
|
||||
_connection.send_message( reply );
|
||||
|
||||
return reply;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto reply = var.as<fc::rpc::response>();
|
||||
_rpc_state.handle_reply( reply );
|
||||
}
|
||||
}
|
||||
catch ( const fc::exception& e )
|
||||
{
|
||||
wdump((e.to_detail_string()));
|
||||
return e.to_detail_string();
|
||||
}
|
||||
return string();
|
||||
}
|
||||
|
||||
} } // namespace fc::rpc
|
||||
|
|
@ -788,7 +788,7 @@ string format_string( const string& format, const variant_object& args )
|
|||
variants result;
|
||||
result.reserve( std::max(aa.size(),ba.size()) );
|
||||
auto num = std::max(aa.size(),ba.size());
|
||||
for( uint64_t i = 0; i < num; ++i )
|
||||
for( unsigned i = 0; i < num; ++i )
|
||||
{
|
||||
if( aa.size() > i && ba.size() > i )
|
||||
result[i] = aa[i] + ba[i];
|
||||
|
|
@ -815,7 +815,7 @@ string format_string( const string& format, const variant_object& args )
|
|||
variants result;
|
||||
result.reserve( std::max(aa.size(),ba.size()) );
|
||||
auto num = std::max(aa.size(),ba.size());
|
||||
for( uint64_t i = 0; i < num; --i )
|
||||
for( unsigned i = 0; i < num; --i )
|
||||
{
|
||||
if( aa.size() > i && ba.size() > i )
|
||||
result[i] = aa[i] - ba[i];
|
||||
|
|
@ -844,7 +844,7 @@ string format_string( const string& format, const variant_object& args )
|
|||
variants result;
|
||||
result.reserve( std::max(aa.size(),ba.size()) );
|
||||
auto num = std::max(aa.size(),ba.size());
|
||||
for( uint64_t i = 0; i < num; ++i )
|
||||
for( unsigned i = 0; i < num; ++i )
|
||||
{
|
||||
if( aa.size() > i && ba.size() > i )
|
||||
result[i] = aa[i] * ba[i];
|
||||
|
|
@ -869,7 +869,7 @@ string format_string( const string& format, const variant_object& args )
|
|||
variants result;
|
||||
result.reserve( std::max(aa.size(),ba.size()) );
|
||||
auto num = std::max(aa.size(),ba.size());
|
||||
for( uint64_t i = 0; i < num; ++i )
|
||||
for( unsigned i = 0; i < num; ++i )
|
||||
{
|
||||
if( aa.size() > i && ba.size() > i )
|
||||
result[i] = aa[i] / ba[i];
|
||||
|
|
|
|||
|
|
@ -1,24 +0,0 @@
|
|||
#include <fc/compress/lzma.hpp>
|
||||
#include <fc/filesystem.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
using namespace fc;
|
||||
|
||||
int main( int argc, char** argv )
|
||||
{
|
||||
if( argc != 2 )
|
||||
{
|
||||
std::cout << "usage: " << argv[0] << " <filename>\n";
|
||||
exit( -1 );
|
||||
}
|
||||
|
||||
auto src = std::string( argv[1] );
|
||||
auto dst = src + ".compressed";
|
||||
lzma_compress_file( src, dst );
|
||||
|
||||
lzma_decompress_file( dst, src + ".decompressed" );
|
||||
|
||||
return 0;
|
||||
}
|
||||
55
vendor/easylzma/CMakeLists.txt
vendored
55
vendor/easylzma/CMakeLists.txt
vendored
|
|
@ -1,55 +0,0 @@
|
|||
# Written in 2009 by Lloyd Hilaiel
|
||||
#
|
||||
# License
|
||||
#
|
||||
# All the cruft you find here is public domain. You don't have to credit
|
||||
# anyone to use this code, but my personal request is that you mention
|
||||
# Igor Pavlov for his hard, high quality work.
|
||||
#
|
||||
|
||||
CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
|
||||
|
||||
PROJECT(easylzma)
|
||||
|
||||
SET (EASYLZMA_MAJOR 0)
|
||||
SET (EASYLZMA_MINOR 0)
|
||||
SET (EASYLZMA_MICRO 7)
|
||||
|
||||
SET (EASYLZMA_DIST_NAME
|
||||
"easylzma-${EASYLZMA_MAJOR}.${EASYLZMA_MINOR}.${EASYLZMA_MICRO}")
|
||||
|
||||
IF (NOT CMAKE_BUILD_TYPE)
|
||||
SET(CMAKE_BUILD_TYPE "Release")
|
||||
ENDIF (NOT CMAKE_BUILD_TYPE)
|
||||
|
||||
IF (${CMAKE_BUILD_TYPE} STREQUAL "Release")
|
||||
MESSAGE("** for a debug build: cmake -DCMAKE_BUILD_TYPE=Debug ..")
|
||||
ENDIF (${CMAKE_BUILD_TYPE} STREQUAL "Release")
|
||||
|
||||
SET(CMAKE_C_FLAGS "-Wall")
|
||||
IF (WIN32)
|
||||
# Commented out since pollutes Keyhotee linker settings
|
||||
# SET(linkFlags "/PDB:NONE /INCREMENTAL:NO /OPT:NOREF /OPT:NOICF")
|
||||
# SET(CMAKE_EXE_LINKER_FLAGS "${linkFlags}"
|
||||
# CACHE STRING "LZMA linker flags" FORCE)
|
||||
SET(CMAKE_EXE_LINKER_FLAGS_DEBUG ""
|
||||
CACHE STRING "LZMA debug linker flags" FORCE)
|
||||
SET(CMAKE_EXE_LINKER_FLAGS_RELEASE
|
||||
CACHE STRING "LZMA release linker flags" FORCE)
|
||||
SET(CMAKE_SHARED_LINKER_FLAGS "${linkFlags}"
|
||||
CACHE STRING "LZMA shared linker flags" FORCE)
|
||||
SET(CMAKE_MODULE_LINKER_FLAGS "${linkFlags}"
|
||||
CACHE STRING "LZMA module linker flags" FORCE)
|
||||
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4668 /wd4820 /wd4996 /wd4255 /wd4130 /wd4100 /wd4711")
|
||||
SET(CMAKE_C_FLAGS_DEBUG "/D DEBUG /Od /Z7")
|
||||
SET(CMAKE_C_FLAGS_RELEASE "/D NDEBUG /O2")
|
||||
ELSE (WIN32)
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -ansi")
|
||||
SET(CMAKE_C_FLAGS_DEBUG "-DDEBUG -g")
|
||||
SET(CMAKE_C_FLAGS_RELEASE "-DNDEBUG -O2")
|
||||
ENDIF (WIN32)
|
||||
|
||||
ADD_SUBDIRECTORY(src)
|
||||
#ADD_SUBDIRECTORY(elzma)
|
||||
#ADD_SUBDIRECTORY(test)
|
||||
59
vendor/easylzma/src/CMakeLists.txt
vendored
59
vendor/easylzma/src/CMakeLists.txt
vendored
|
|
@ -1,59 +0,0 @@
|
|||
# Written in 2009 by Lloyd Hilaiel
|
||||
#
|
||||
# License
|
||||
#
|
||||
# All the cruft you find here is public domain. You don't have to credit
|
||||
# anyone to use this code, but my personal request is that you mention
|
||||
# Igor Pavlov for his hard, high quality work.
|
||||
#
|
||||
|
||||
IF (WIN32)
|
||||
ADD_DEFINITIONS(-DWIN32)
|
||||
ENDIF (WIN32)
|
||||
|
||||
FILE(GLOB SRCS pavlov/*.c *.c)
|
||||
FILE(GLOB HDRS *.h pavlov/*.h easylzma/*.h)
|
||||
FILE(GLOB PUB_HDRS easylzma/*.h)
|
||||
|
||||
# set up some paths for outputing the usable binaries
|
||||
#SET (libDir
|
||||
# ${CMAKE_CURRENT_BINARY_DIR}/${EASYLZMA_DIST_NAME}/lib)
|
||||
#SET (incDir
|
||||
# ${CMAKE_CURRENT_BINARY_DIR}/${EASYLZMA_DIST_NAME}/include/easylzma)
|
||||
|
||||
# an include directory to allow easylzma implementation to find public
|
||||
# headers
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
# get the built libs into the correct place
|
||||
#SET(LIBRARY_OUTPUT_PATH ${libDir})
|
||||
|
||||
ADD_LIBRARY(easylzma_static STATIC ${SRCS} ${HDRS})
|
||||
#ADD_LIBRARY(easylzma SHARED ${SRCS} ${HDRS})
|
||||
|
||||
# setup shared library version numbering
|
||||
#SET_TARGET_PROPERTIES(
|
||||
# easylzma PROPERTIES
|
||||
# SOVERSION ${EASYLZMA_MAJOR}
|
||||
# VERSION ${EASYLZMA_MAJOR}.${EASYLZMA_MINOR}.${EASYLZMA_MICRO})
|
||||
|
||||
# on win32 we'll need to setup exports correctly
|
||||
SET(sharedLibCompileFlags "-DEASYLZMA_SHARED -DEASYLZMA_BUILD")
|
||||
IF (APPLE)
|
||||
SET(sharedLibCompileFlags "${sharedLibCompileFlags} -fno-common")
|
||||
ENDIF (APPLE)
|
||||
|
||||
#SET_TARGET_PROPERTIES(easylzma PROPERTIES
|
||||
# COMPILE_FLAGS ${sharedLibCompileFlags})
|
||||
|
||||
# create these output directories
|
||||
#FILE(MAKE_DIRECTORY ${libDir})
|
||||
#FILE(MAKE_DIRECTORY ${incDir})
|
||||
|
||||
### copy the two required headers into our output dir as a post build step
|
||||
# copy public headers to output directory
|
||||
#FOREACH (header ${PUB_HDRS})
|
||||
# preserve relative pathing
|
||||
#ADD_CUSTOM_COMMAND(TARGET easylzma_s POST_BUILD
|
||||
#COMMAND ${CMAKE_COMMAND} -E copy_if_different ${header} ${incDir})
|
||||
#ENDFOREACH (header ${PUB_HDRS})
|
||||
3
vendor/easylzma/src/README
vendored
3
vendor/easylzma/src/README
vendored
|
|
@ -1,3 +0,0 @@
|
|||
pavlov/ - contains original lzma compress/decompress source from Igor Pavlov
|
||||
easylzma/ - contains the public api of this library
|
||||
./ - contains the implementation of this wrapper library
|
||||
43
vendor/easylzma/src/common_internal.c
vendored
43
vendor/easylzma/src/common_internal.c
vendored
|
|
@ -1,43 +0,0 @@
|
|||
/*
|
||||
* Written in 2009 by Lloyd Hilaiel
|
||||
*
|
||||
* License
|
||||
*
|
||||
* All the cruft you find here is public domain. You don't have to credit
|
||||
* anyone to use this code, but my personal request is that you mention
|
||||
* Igor Pavlov for his hard, high quality work.
|
||||
*/
|
||||
|
||||
#include "common_internal.h"
|
||||
|
||||
static void *elzmaAlloc(void *p, size_t size) {
|
||||
struct elzma_alloc_struct * as = (struct elzma_alloc_struct *) p;
|
||||
if (as->clientMallocFunc) {
|
||||
return as->clientMallocFunc(as->clientMallocContext, size);
|
||||
}
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
static void elzmaFree(void *p, void *address) {
|
||||
struct elzma_alloc_struct * as = (struct elzma_alloc_struct *) p;
|
||||
if (as->clientFreeFunc) {
|
||||
as->clientFreeFunc(as->clientMallocContext, address);
|
||||
} else {
|
||||
free(address);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
init_alloc_struct(struct elzma_alloc_struct * as,
|
||||
elzma_malloc clientMallocFunc,
|
||||
void * clientMallocContext,
|
||||
elzma_free clientFreeFunc,
|
||||
void * clientFreeContext)
|
||||
{
|
||||
as->Alloc = elzmaAlloc;
|
||||
as->Free = elzmaFree;
|
||||
as->clientMallocFunc = clientMallocFunc;
|
||||
as->clientMallocContext = clientMallocContext;
|
||||
as->clientFreeFunc = clientFreeFunc;
|
||||
as->clientFreeContext = clientFreeContext;
|
||||
}
|
||||
63
vendor/easylzma/src/common_internal.h
vendored
63
vendor/easylzma/src/common_internal.h
vendored
|
|
@ -1,63 +0,0 @@
|
|||
#ifndef __ELZMA_COMMON_INTERNAL_H__
|
||||
#define __ELZMA_COMMON_INTERNAL_H__
|
||||
|
||||
#include "easylzma/common.h"
|
||||
|
||||
/** a structure which may be cast and passed into Igor's allocate
|
||||
* routines */
|
||||
struct elzma_alloc_struct {
|
||||
void *(*Alloc)(void *p, size_t size);
|
||||
void (*Free)(void *p, void *address); /* address can be 0 */
|
||||
|
||||
elzma_malloc clientMallocFunc;
|
||||
void * clientMallocContext;
|
||||
|
||||
elzma_free clientFreeFunc;
|
||||
void * clientFreeContext;
|
||||
};
|
||||
|
||||
/* initialize an allocation structure, may be called safely multiple
|
||||
* times */
|
||||
void init_alloc_struct(struct elzma_alloc_struct * allocStruct,
|
||||
elzma_malloc clientMallocFunc,
|
||||
void * clientMallocContext,
|
||||
elzma_free clientFreeFunc,
|
||||
void * clientFreeContext);
|
||||
|
||||
/** superset representation of a compressed file header */
|
||||
struct elzma_file_header {
|
||||
unsigned char pb;
|
||||
unsigned char lp;
|
||||
unsigned char lc;
|
||||
unsigned char isStreamed;
|
||||
long long unsigned int uncompressedSize;
|
||||
unsigned int dictSize;
|
||||
};
|
||||
|
||||
/** superset representation of a compressed file footer */
|
||||
struct elzma_file_footer {
|
||||
unsigned int crc32;
|
||||
long long unsigned int uncompressedSize;
|
||||
};
|
||||
|
||||
/** a structure which encapsulates information about the particular
|
||||
* file header and footer in use (lzip vs lzma vs (eventually) xz.
|
||||
* The intention of this structure is to simplify compression and
|
||||
* decompression logic by abstracting the file format details a bit. */
|
||||
struct elzma_format_handler
|
||||
{
|
||||
unsigned int header_size;
|
||||
void (*init_header)(struct elzma_file_header * hdr);
|
||||
int (*parse_header)(const unsigned char * hdrBuf,
|
||||
struct elzma_file_header * hdr);
|
||||
int (*serialize_header)(unsigned char * hdrBuf,
|
||||
const struct elzma_file_header * hdr);
|
||||
|
||||
unsigned int footer_size;
|
||||
int (*serialize_footer)(struct elzma_file_footer * ftr,
|
||||
unsigned char * ftrBuf);
|
||||
int (*parse_footer)(const unsigned char * ftrBuf,
|
||||
struct elzma_file_footer * ftr);
|
||||
};
|
||||
|
||||
#endif
|
||||
308
vendor/easylzma/src/compress.c
vendored
308
vendor/easylzma/src/compress.c
vendored
|
|
@ -1,308 +0,0 @@
|
|||
/*
|
||||
* Written in 2009 by Lloyd Hilaiel
|
||||
*
|
||||
* License
|
||||
*
|
||||
* All the cruft you find here is public domain. You don't have to credit
|
||||
* anyone to use this code, but my personal request is that you mention
|
||||
* Igor Pavlov for his hard, high quality work.
|
||||
*/
|
||||
|
||||
#include "easylzma/compress.h"
|
||||
#include "lzma_header.h"
|
||||
#include "lzip_header.h"
|
||||
#include "common_internal.h"
|
||||
|
||||
#include "pavlov/Types.h"
|
||||
#include "pavlov/LzmaEnc.h"
|
||||
#include "pavlov/7zCrc.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
struct _elzma_compress_handle {
|
||||
CLzmaEncProps props;
|
||||
CLzmaEncHandle encHand;
|
||||
unsigned long long uncompressedSize;
|
||||
elzma_file_format format;
|
||||
struct elzma_alloc_struct allocStruct;
|
||||
struct elzma_format_handler formatHandler;
|
||||
};
|
||||
|
||||
elzma_compress_handle
|
||||
elzma_compress_alloc()
|
||||
{
|
||||
elzma_compress_handle hand = malloc(sizeof(struct _elzma_compress_handle));
|
||||
memset((void *) hand, 0, sizeof(struct _elzma_compress_handle));
|
||||
|
||||
/* "reasonable" defaults for props */
|
||||
LzmaEncProps_Init(&(hand->props));
|
||||
hand->props.lc = 3;
|
||||
hand->props.lp = 0;
|
||||
hand->props.pb = 2;
|
||||
hand->props.level = 9;
|
||||
hand->props.algo = 1;
|
||||
hand->props.fb = 32;
|
||||
hand->props.dictSize = 1 << 24;
|
||||
hand->props.btMode = 1;
|
||||
hand->props.numHashBytes = 4;
|
||||
hand->props.mc = 32;
|
||||
hand->props.numThreads = 1;
|
||||
hand->props.writeEndMark = 1;
|
||||
|
||||
init_alloc_struct(&(hand->allocStruct), NULL, NULL, NULL, NULL);
|
||||
|
||||
/* default format is LZMA-Alone */
|
||||
initializeLZMAFormatHandler(&(hand->formatHandler));
|
||||
|
||||
return hand;
|
||||
}
|
||||
|
||||
void
|
||||
elzma_compress_free(elzma_compress_handle * hand)
|
||||
{
|
||||
if (hand && *hand) {
|
||||
if ((*hand)->encHand) {
|
||||
LzmaEnc_Destroy((*hand)->encHand,
|
||||
(ISzAlloc *) &((*hand)->allocStruct),
|
||||
(ISzAlloc *) &((*hand)->allocStruct));
|
||||
}
|
||||
|
||||
}
|
||||
*hand = NULL;
|
||||
}
|
||||
|
||||
int
|
||||
elzma_compress_config(elzma_compress_handle hand,
|
||||
unsigned char lc,
|
||||
unsigned char lp,
|
||||
unsigned char pb,
|
||||
unsigned char level,
|
||||
unsigned int dictionarySize,
|
||||
elzma_file_format format,
|
||||
unsigned long long uncompressedSize)
|
||||
{
|
||||
/* XXX: validate arguments are in valid ranges */
|
||||
|
||||
hand->props.lc = lc;
|
||||
hand->props.lp = lp;
|
||||
hand->props.pb = pb;
|
||||
hand->props.level = level;
|
||||
hand->props.dictSize = dictionarySize;
|
||||
hand->uncompressedSize = uncompressedSize;
|
||||
hand->format = format;
|
||||
|
||||
/* default of LZMA-Alone is set at alloc time, and there are only
|
||||
* two possible formats */
|
||||
if (format == ELZMA_lzip) {
|
||||
initializeLZIPFormatHandler(&(hand->formatHandler));
|
||||
}
|
||||
|
||||
return ELZMA_E_OK;
|
||||
}
|
||||
|
||||
/* use Igor's stream hooks for compression. */
|
||||
struct elzmaInStream
|
||||
{
|
||||
SRes (*ReadPtr)(void *p, void *buf, size_t *size);
|
||||
elzma_read_callback inputStream;
|
||||
void * inputContext;
|
||||
unsigned int crc32;
|
||||
unsigned int crc32a;
|
||||
unsigned int crc32b;
|
||||
unsigned int crc32c;
|
||||
int calculateCRC;
|
||||
};
|
||||
|
||||
static SRes elzmaReadFunc(void *p, void *buf, size_t *size)
|
||||
{
|
||||
int rv;
|
||||
struct elzmaInStream * is = (struct elzmaInStream *) p;
|
||||
rv = is->inputStream(is->inputContext, buf, size);
|
||||
if (rv == 0 && *size > 0 && is->calculateCRC) {
|
||||
is->crc32 = CrcUpdate(is->crc32, buf, *size);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
struct elzmaOutStream {
|
||||
size_t (*WritePtr)(void *p, const void *buf, size_t size);
|
||||
elzma_write_callback outputStream;
|
||||
void * outputContext;
|
||||
};
|
||||
|
||||
static size_t elzmaWriteFunc(void *p, const void *buf, size_t size)
|
||||
{
|
||||
struct elzmaOutStream * os = (struct elzmaOutStream *) p;
|
||||
return os->outputStream(os->outputContext, buf, size);
|
||||
}
|
||||
|
||||
/* use Igor's stream hooks for compression. */
|
||||
struct elzmaProgressStruct
|
||||
{
|
||||
SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize);
|
||||
long long unsigned int uncompressedSize;
|
||||
elzma_progress_callback progressCallback;
|
||||
void * progressContext;
|
||||
|
||||
};
|
||||
|
||||
#include <stdio.h>
|
||||
static SRes elzmaProgress(void *p, UInt64 inSize, UInt64 outSize)
|
||||
{
|
||||
struct elzmaProgressStruct * ps = (struct elzmaProgressStruct *) p;
|
||||
if (ps->progressCallback) {
|
||||
ps->progressCallback(ps->progressContext, inSize,
|
||||
ps->uncompressedSize);
|
||||
}
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
void elzma_compress_set_allocation_callbacks(
|
||||
elzma_compress_handle hand,
|
||||
elzma_malloc mallocFunc, void * mallocFuncContext,
|
||||
elzma_free freeFunc, void * freeFuncContext)
|
||||
{
|
||||
if (hand) {
|
||||
init_alloc_struct(&(hand->allocStruct),
|
||||
mallocFunc, mallocFuncContext,
|
||||
freeFunc, freeFuncContext);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
elzma_compress_run(elzma_compress_handle hand,
|
||||
elzma_read_callback inputStream, void * inputContext,
|
||||
elzma_write_callback outputStream, void * outputContext,
|
||||
elzma_progress_callback progressCallback,
|
||||
void * progressContext)
|
||||
{
|
||||
struct elzmaInStream inStreamStruct;
|
||||
struct elzmaOutStream outStreamStruct;
|
||||
struct elzmaProgressStruct progressStruct;
|
||||
SRes r;
|
||||
|
||||
CrcGenerateTable();
|
||||
|
||||
if (hand == NULL || inputStream == NULL) return ELZMA_E_BAD_PARAMS;
|
||||
|
||||
/* initialize stream structrures */
|
||||
inStreamStruct.ReadPtr = elzmaReadFunc;
|
||||
inStreamStruct.inputStream = inputStream;
|
||||
inStreamStruct.inputContext = inputContext;
|
||||
inStreamStruct.crc32 = CRC_INIT_VAL;
|
||||
inStreamStruct.calculateCRC =
|
||||
(hand->formatHandler.serialize_footer != NULL);
|
||||
|
||||
outStreamStruct.WritePtr = elzmaWriteFunc;
|
||||
outStreamStruct.outputStream = outputStream;
|
||||
outStreamStruct.outputContext = outputContext;
|
||||
|
||||
progressStruct.Progress = elzmaProgress;
|
||||
progressStruct.uncompressedSize = hand->uncompressedSize;
|
||||
progressStruct.progressCallback = progressCallback;
|
||||
progressStruct.progressContext = progressContext;
|
||||
|
||||
/* create an encoding object */
|
||||
hand->encHand = LzmaEnc_Create((ISzAlloc *) &(hand->allocStruct));
|
||||
|
||||
if (hand->encHand == NULL) {
|
||||
return ELZMA_E_COMPRESS_ERROR;
|
||||
}
|
||||
|
||||
/* inintialize with compression parameters */
|
||||
if (SZ_OK != LzmaEnc_SetProps(hand->encHand, &(hand->props)))
|
||||
{
|
||||
return ELZMA_E_BAD_PARAMS;
|
||||
}
|
||||
|
||||
/* verify format is sane */
|
||||
if (ELZMA_lzma != hand->format && ELZMA_lzip != hand->format) {
|
||||
return ELZMA_E_UNSUPPORTED_FORMAT;
|
||||
}
|
||||
|
||||
/* now write the compression header header */
|
||||
{
|
||||
unsigned char * hdr =
|
||||
hand->allocStruct.Alloc(&(hand->allocStruct),
|
||||
hand->formatHandler.header_size);
|
||||
|
||||
struct elzma_file_header h;
|
||||
size_t wt;
|
||||
|
||||
hand->formatHandler.init_header(&h);
|
||||
h.pb = (unsigned char) hand->props.pb;
|
||||
h.lp = (unsigned char) hand->props.lp;
|
||||
h.lc = (unsigned char) hand->props.lc;
|
||||
h.dictSize = hand->props.dictSize;
|
||||
h.isStreamed = (unsigned char) (hand->uncompressedSize == 0);
|
||||
h.uncompressedSize = hand->uncompressedSize;
|
||||
|
||||
hand->formatHandler.serialize_header(hdr, &h);
|
||||
|
||||
wt = outputStream(outputContext, (void *) hdr,
|
||||
hand->formatHandler.header_size);
|
||||
|
||||
hand->allocStruct.Free(&(hand->allocStruct), hdr);
|
||||
|
||||
if (wt != hand->formatHandler.header_size) {
|
||||
return ELZMA_E_OUTPUT_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/* begin LZMA encoding */
|
||||
/* XXX: expose encoding progress */
|
||||
r = LzmaEnc_Encode(hand->encHand,
|
||||
(ISeqOutStream *) &outStreamStruct,
|
||||
(ISeqInStream *) &inStreamStruct,
|
||||
(ICompressProgress *) &progressStruct,
|
||||
(ISzAlloc *) &(hand->allocStruct),
|
||||
(ISzAlloc *) &(hand->allocStruct));
|
||||
|
||||
if (r != SZ_OK) return ELZMA_E_COMPRESS_ERROR;
|
||||
|
||||
/* support a footer! (lzip) */
|
||||
if (hand->formatHandler.serialize_footer != NULL &&
|
||||
hand->formatHandler.footer_size > 0)
|
||||
{
|
||||
size_t wt;
|
||||
unsigned char * ftrBuf =
|
||||
hand->allocStruct.Alloc(&(hand->allocStruct),
|
||||
hand->formatHandler.footer_size);
|
||||
struct elzma_file_footer ftr;
|
||||
ftr.crc32 = inStreamStruct.crc32 ^ 0xFFFFFFFF;
|
||||
ftr.uncompressedSize = hand->uncompressedSize;
|
||||
|
||||
hand->formatHandler.serialize_footer(&ftr, ftrBuf);
|
||||
|
||||
wt = outputStream(outputContext, (void *) ftrBuf,
|
||||
hand->formatHandler.footer_size);
|
||||
|
||||
hand->allocStruct.Free(&(hand->allocStruct), ftrBuf);
|
||||
|
||||
if (wt != hand->formatHandler.footer_size) {
|
||||
return ELZMA_E_OUTPUT_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return ELZMA_E_OK;
|
||||
}
|
||||
|
||||
unsigned int
|
||||
elzma_get_dict_size(unsigned long long size)
|
||||
{
|
||||
int i = 13; /* 16k dict is minimum */
|
||||
|
||||
/* now we'll find the closes power of two with a max at 16< *
|
||||
* if the size is greater than 8m, we'll divide by two, all of this
|
||||
* is based on a quick set of emperical tests on hopefully
|
||||
* representative sample data */
|
||||
if ( size > ( 1 << 23 ) ) size >>= 1;
|
||||
|
||||
while (size >> i) i++;
|
||||
|
||||
if (i > 23) return 1 << 23;
|
||||
|
||||
/* now 1 << i is greater than size, let's return either 1<<i or 1<<(i-1),
|
||||
* whichever is closer to size */
|
||||
return 1 << ((((1 << i) - size) > (size - (1 << (i-1)))) ? i-1 : i);
|
||||
}
|
||||
254
vendor/easylzma/src/decompress.c
vendored
254
vendor/easylzma/src/decompress.c
vendored
|
|
@ -1,254 +0,0 @@
|
|||
/*
|
||||
* Written in 2009 by Lloyd Hilaiel
|
||||
*
|
||||
* License
|
||||
*
|
||||
* All the cruft you find here is public domain. You don't have to credit
|
||||
* anyone to use this code, but my personal request is that you mention
|
||||
* Igor Pavlov for his hard, high quality work.
|
||||
*/
|
||||
|
||||
#include "easylzma/decompress.h"
|
||||
#include "pavlov/LzmaDec.h"
|
||||
#include "pavlov/7zCrc.h"
|
||||
#include "common_internal.h"
|
||||
#include "lzma_header.h"
|
||||
#include "lzip_header.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#define ELZMA_DECOMPRESS_INPUT_BUFSIZE (1024 * 64)
|
||||
#define ELZMA_DECOMPRESS_OUTPUT_BUFSIZE (1024 * 256)
|
||||
|
||||
/** an opaque handle to an lzma decompressor */
|
||||
struct _elzma_decompress_handle {
|
||||
char inbuf[ELZMA_DECOMPRESS_INPUT_BUFSIZE];
|
||||
char outbuf[ELZMA_DECOMPRESS_OUTPUT_BUFSIZE];
|
||||
struct elzma_alloc_struct allocStruct;
|
||||
};
|
||||
|
||||
elzma_decompress_handle
|
||||
elzma_decompress_alloc()
|
||||
{
|
||||
elzma_decompress_handle hand =
|
||||
malloc(sizeof(struct _elzma_decompress_handle));
|
||||
memset((void *) hand, 0, sizeof(struct _elzma_decompress_handle));
|
||||
init_alloc_struct(&(hand->allocStruct), NULL, NULL, NULL, NULL);
|
||||
return hand;
|
||||
}
|
||||
|
||||
void elzma_decompress_set_allocation_callbacks(
|
||||
elzma_decompress_handle hand,
|
||||
elzma_malloc mallocFunc, void * mallocFuncContext,
|
||||
elzma_free freeFunc, void * freeFuncContext)
|
||||
{
|
||||
if (hand) {
|
||||
init_alloc_struct(&(hand->allocStruct),
|
||||
mallocFunc, mallocFuncContext,
|
||||
freeFunc, freeFuncContext);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
elzma_decompress_free(elzma_decompress_handle * hand)
|
||||
{
|
||||
if (*hand) free(*hand);
|
||||
*hand = NULL;
|
||||
}
|
||||
|
||||
int
|
||||
elzma_decompress_run(elzma_decompress_handle hand,
|
||||
elzma_read_callback inputStream, void * inputContext,
|
||||
elzma_write_callback outputStream, void * outputContext,
|
||||
elzma_file_format format)
|
||||
{
|
||||
unsigned long long int totalRead = 0; /* total amount read from stream */
|
||||
unsigned int crc32 = CRC_INIT_VAL; /* running crc32 (lzip case) */
|
||||
CLzmaDec dec;
|
||||
unsigned int errorCode = ELZMA_E_OK;
|
||||
struct elzma_format_handler formatHandler;
|
||||
struct elzma_file_header h;
|
||||
struct elzma_file_footer f;
|
||||
|
||||
/* switch between supported formats */
|
||||
if (format == ELZMA_lzma) {
|
||||
initializeLZMAFormatHandler(&formatHandler);
|
||||
} else if (format == ELZMA_lzip) {
|
||||
CrcGenerateTable();
|
||||
initializeLZIPFormatHandler(&formatHandler);
|
||||
} else {
|
||||
return ELZMA_E_BAD_PARAMS;
|
||||
}
|
||||
|
||||
/* initialize footer */
|
||||
f.crc32 = 0;
|
||||
f.uncompressedSize = 0;
|
||||
|
||||
/* initialize decoder memory */
|
||||
memset((void *) &dec, 0, sizeof(dec));
|
||||
LzmaDec_Init(&dec);
|
||||
|
||||
/* decode the header. */
|
||||
{
|
||||
unsigned char * hdr =
|
||||
hand->allocStruct.Alloc(&(hand->allocStruct),
|
||||
formatHandler.header_size);
|
||||
|
||||
size_t sz = formatHandler.header_size;
|
||||
|
||||
formatHandler.init_header(&h);
|
||||
|
||||
if (inputStream(inputContext, hdr, &sz) != 0 ||
|
||||
sz != formatHandler.header_size)
|
||||
{
|
||||
hand->allocStruct.Free(&(hand->allocStruct), hdr);
|
||||
return ELZMA_E_INPUT_ERROR;
|
||||
}
|
||||
|
||||
if (0 != formatHandler.parse_header(hdr, &h)) {
|
||||
hand->allocStruct.Free(&(hand->allocStruct), hdr);
|
||||
return ELZMA_E_CORRUPT_HEADER;
|
||||
}
|
||||
|
||||
/* the LzmaDec_Allocate call requires 5 bytes which have
|
||||
* compression properties encoded in them. In the case of
|
||||
* lzip, the header format does not already contain what
|
||||
* LzmaDec_Allocate expects, so we must craft it, silly */
|
||||
{
|
||||
unsigned char propsBuf[13];
|
||||
const unsigned char * propsPtr = hdr;
|
||||
|
||||
if (format == ELZMA_lzip) {
|
||||
struct elzma_format_handler lzmaHand;
|
||||
initializeLZMAFormatHandler(&lzmaHand);
|
||||
lzmaHand.serialize_header(propsBuf, &h);
|
||||
propsPtr = propsBuf;
|
||||
}
|
||||
|
||||
/* now we're ready to allocate the decoder */
|
||||
LzmaDec_Allocate(&dec, propsPtr, 5,
|
||||
(ISzAlloc *) &(hand->allocStruct));
|
||||
}
|
||||
|
||||
hand->allocStruct.Free(&(hand->allocStruct), hdr);
|
||||
}
|
||||
|
||||
/* perform the decoding */
|
||||
for (;;)
|
||||
{
|
||||
size_t dstLen = ELZMA_DECOMPRESS_OUTPUT_BUFSIZE;
|
||||
size_t srcLen = ELZMA_DECOMPRESS_INPUT_BUFSIZE;
|
||||
size_t amt = 0;
|
||||
size_t bufOff = 0;
|
||||
ELzmaStatus stat;
|
||||
|
||||
if (0 != inputStream(inputContext, hand->inbuf, &srcLen))
|
||||
{
|
||||
errorCode = ELZMA_E_INPUT_ERROR;
|
||||
goto decompressEnd;
|
||||
}
|
||||
|
||||
/* handle the case where the input prematurely finishes */
|
||||
if (srcLen == 0) {
|
||||
errorCode = ELZMA_E_INSUFFICIENT_INPUT;
|
||||
goto decompressEnd;
|
||||
}
|
||||
|
||||
amt = srcLen;
|
||||
|
||||
/* handle the case where a single read buffer of compressed bytes
|
||||
* will translate into multiple buffers of uncompressed bytes,
|
||||
* with this inner loop */
|
||||
stat = LZMA_STATUS_NOT_SPECIFIED;
|
||||
|
||||
while (bufOff < srcLen) {
|
||||
SRes r = LzmaDec_DecodeToBuf(&dec, (Byte *) hand->outbuf, &dstLen,
|
||||
((Byte *) hand->inbuf + bufOff), &amt,
|
||||
LZMA_FINISH_ANY, &stat);
|
||||
|
||||
/* XXX deal with result code more granularly*/
|
||||
if (r != SZ_OK) {
|
||||
errorCode = ELZMA_E_DECOMPRESS_ERROR;
|
||||
goto decompressEnd;
|
||||
}
|
||||
|
||||
/* write what we've read */
|
||||
{
|
||||
size_t wt;
|
||||
|
||||
/* if decoding lzip, update our crc32 value */
|
||||
if (format == ELZMA_lzip && dstLen > 0) {
|
||||
crc32 = CrcUpdate(crc32, hand->outbuf, dstLen);
|
||||
|
||||
}
|
||||
totalRead += dstLen;
|
||||
|
||||
wt = outputStream(outputContext, hand->outbuf, dstLen);
|
||||
if (wt != dstLen) {
|
||||
errorCode = ELZMA_E_OUTPUT_ERROR;
|
||||
goto decompressEnd;
|
||||
}
|
||||
}
|
||||
|
||||
/* do we have more data on the input buffer? */
|
||||
bufOff += amt;
|
||||
assert( bufOff <= srcLen );
|
||||
if (bufOff >= srcLen) break;
|
||||
amt = srcLen - bufOff;
|
||||
|
||||
/* with lzip, we will have the footer left on the buffer! */
|
||||
if (stat == LZMA_STATUS_FINISHED_WITH_MARK) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* now check status */
|
||||
if (stat == LZMA_STATUS_FINISHED_WITH_MARK) {
|
||||
/* read a footer if one is expected and
|
||||
* present */
|
||||
if (formatHandler.footer_size > 0 &&
|
||||
amt >= formatHandler.footer_size &&
|
||||
formatHandler.parse_footer != NULL)
|
||||
{
|
||||
formatHandler.parse_footer(
|
||||
(unsigned char *) hand->inbuf + bufOff, &f);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
/* for LZMA utils, we don't always have a finished mark */
|
||||
if (!h.isStreamed && totalRead >= h.uncompressedSize) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* finish the calculated crc32 */
|
||||
crc32 ^= 0xFFFFFFFF;
|
||||
|
||||
/* if we have a footer, check that the calculated crc32 matches
|
||||
* the encoded crc32, and that the sizes match */
|
||||
if (formatHandler.footer_size)
|
||||
{
|
||||
if (f.crc32 != crc32) {
|
||||
errorCode = ELZMA_E_CRC32_MISMATCH;
|
||||
} else if (f.uncompressedSize != totalRead) {
|
||||
errorCode = ELZMA_E_SIZE_MISMATCH;
|
||||
}
|
||||
}
|
||||
else if (!h.isStreamed)
|
||||
{
|
||||
/* if the format does not support a footer and has an uncompressed
|
||||
* size in the header, let's compare that with how much we actually
|
||||
* read */
|
||||
if (h.uncompressedSize != totalRead) {
|
||||
errorCode = ELZMA_E_SIZE_MISMATCH;
|
||||
}
|
||||
}
|
||||
|
||||
decompressEnd:
|
||||
LzmaDec_Free(&dec, (ISzAlloc *) &(hand->allocStruct));
|
||||
|
||||
return errorCode;
|
||||
}
|
||||
124
vendor/easylzma/src/easylzma/common.h
vendored
124
vendor/easylzma/src/easylzma/common.h
vendored
|
|
@ -1,124 +0,0 @@
|
|||
/*
|
||||
* Written in 2009 by Lloyd Hilaiel
|
||||
*
|
||||
* License
|
||||
*
|
||||
* All the cruft you find here is public domain. You don't have to credit
|
||||
* anyone to use this code, but my personal request is that you mention
|
||||
* Igor Pavlov for his hard, high quality work.
|
||||
*
|
||||
* easylzma/common.h - definitions common to both compression and
|
||||
* decompression
|
||||
*/
|
||||
|
||||
#ifndef __EASYLZMACOMMON_H__
|
||||
#define __EASYLZMACOMMON_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* msft dll export gunk. To build a DLL on windows, you
|
||||
* must define WIN32, EASYLZMA_SHARED, and EASYLZMA_BUILD. To use a
|
||||
* DLL, you must define EASYLZMA_SHARED and WIN32 */
|
||||
#if defined(WIN32) && defined(EASYLZMA_SHARED)
|
||||
# ifdef EASYLZMA_BUILD
|
||||
# define EASYLZMA_API __declspec(dllexport)
|
||||
# else
|
||||
# define EASYLZMA_API __declspec(dllimport)
|
||||
# endif
|
||||
#else
|
||||
# define EASYLZMA_API
|
||||
#endif
|
||||
|
||||
/** error codes */
|
||||
|
||||
/** no error */
|
||||
#define ELZMA_E_OK 0
|
||||
/** bad parameters passed to an ELZMA function */
|
||||
#define ELZMA_E_BAD_PARAMS 10
|
||||
/** could not initialize the encode with configured parameters. */
|
||||
#define ELZMA_E_ENCODING_PROPERTIES_ERROR 11
|
||||
/** an error occured during compression (XXX: be more specific) */
|
||||
#define ELZMA_E_COMPRESS_ERROR 12
|
||||
/** currently unsupported lzma file format was specified*/
|
||||
#define ELZMA_E_UNSUPPORTED_FORMAT 13
|
||||
/** an error occured when reading input */
|
||||
#define ELZMA_E_INPUT_ERROR 14
|
||||
/** an error occured when writing output */
|
||||
#define ELZMA_E_OUTPUT_ERROR 15
|
||||
/** LZMA header couldn't be parsed */
|
||||
#define ELZMA_E_CORRUPT_HEADER 16
|
||||
/** an error occured during decompression (XXX: be more specific) */
|
||||
#define ELZMA_E_DECOMPRESS_ERROR 17
|
||||
/** the input stream returns EOF before the decompression could complete */
|
||||
#define ELZMA_E_INSUFFICIENT_INPUT 18
|
||||
/** for formats which have an emebedded crc, this error would indicated that
|
||||
* what came out was not what went in, i.e. data corruption */
|
||||
#define ELZMA_E_CRC32_MISMATCH 19
|
||||
/** for formats which have an emebedded uncompressed content length,
|
||||
* this error indicates that the amount we read was not what we expected */
|
||||
#define ELZMA_E_SIZE_MISMATCH 20
|
||||
|
||||
|
||||
/** Supported file formats */
|
||||
typedef enum {
|
||||
ELZMA_lzip, /**< the lzip format which includes a magic number and
|
||||
* CRC check */
|
||||
ELZMA_lzma /**< the LZMA-Alone format, originally designed by
|
||||
* Igor Pavlov and in widespread use due to lzmautils,
|
||||
* lacking both aforementioned features of lzip */
|
||||
/* XXX: future, potentially ,
|
||||
ELZMA_xz
|
||||
*/
|
||||
} elzma_file_format;
|
||||
|
||||
/**
|
||||
* A callback invoked during elzma_[de]compress_run when the [de]compression
|
||||
* process has generated [de]compressed output.
|
||||
*
|
||||
* the size parameter indicates how much data is in buf to be written.
|
||||
* it is required that the write callback consume all data, and a return
|
||||
* value not equal to input size indicates and error.
|
||||
*/
|
||||
typedef size_t (*elzma_write_callback)(void *ctx, const void *buf,
|
||||
size_t size);
|
||||
|
||||
/**
|
||||
* A callback invoked during elzma_[de]compress_run when the [de]compression
|
||||
* process requires more [un]compressed input.
|
||||
*
|
||||
* the size parameter is an in/out argument. on input it indicates
|
||||
* the buffer size. on output it indicates the amount of data read into
|
||||
* buf. when *size is zero on output it indicates EOF.
|
||||
*
|
||||
* \returns the read callback should return nonzero on failure.
|
||||
*/
|
||||
typedef int (*elzma_read_callback)(void *ctx, void *buf,
|
||||
size_t *size);
|
||||
|
||||
/**
|
||||
* A callback invoked during elzma_[de]compress_run to report progress
|
||||
* on the [de]compression.
|
||||
*
|
||||
* \returns the read callback should return nonzero on failure.
|
||||
*/
|
||||
typedef void (*elzma_progress_callback)(void *ctx, size_t complete,
|
||||
size_t total);
|
||||
|
||||
|
||||
/** pointer to a malloc function, supporting client overriding memory
|
||||
* allocation routines */
|
||||
typedef void * (*elzma_malloc)(void *ctx, unsigned int sz);
|
||||
|
||||
/** pointer to a free function, supporting client overriding memory
|
||||
* allocation routines */
|
||||
typedef void (*elzma_free)(void *ctx, void * ptr);
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif
|
||||
85
vendor/easylzma/src/easylzma/compress.h
vendored
85
vendor/easylzma/src/easylzma/compress.h
vendored
|
|
@ -1,85 +0,0 @@
|
|||
/*
|
||||
* Written in 2009 by Lloyd Hilaiel
|
||||
*
|
||||
* License
|
||||
*
|
||||
* All the cruft you find here is public domain. You don't have to credit
|
||||
* anyone to use this code, but my personal request is that you mention
|
||||
* Igor Pavlov for his hard, high quality work.
|
||||
*
|
||||
* easylzma/compress.h - the API for LZMA compression using easylzma
|
||||
*/
|
||||
|
||||
#ifndef __EASYLZMACOMPRESS_H__
|
||||
#define __EASYLZMACOMPRESS_H__
|
||||
|
||||
#include "easylzma/common.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** suggested default values */
|
||||
#define ELZMA_LC_DEFAULT 3
|
||||
#define ELZMA_LP_DEFAULT 0
|
||||
#define ELZMA_PB_DEFAULT 2
|
||||
#define ELZMA_DICT_SIZE_DEFAULT_MAX (1 << 24)
|
||||
|
||||
/** an opaque handle to an lzma compressor */
|
||||
typedef struct _elzma_compress_handle * elzma_compress_handle;
|
||||
|
||||
/**
|
||||
* Allocate a handle to an LZMA compressor object.
|
||||
*/
|
||||
elzma_compress_handle EASYLZMA_API elzma_compress_alloc();
|
||||
|
||||
/**
|
||||
* set allocation routines (optional, if not called malloc & free will
|
||||
* be used)
|
||||
*/
|
||||
void EASYLZMA_API elzma_compress_set_allocation_callbacks(
|
||||
elzma_compress_handle hand,
|
||||
elzma_malloc mallocFunc, void * mallocFuncContext,
|
||||
elzma_free freeFunc, void * freeFuncContext);
|
||||
|
||||
/**
|
||||
* Free all data associated with an LZMA compressor object.
|
||||
*/
|
||||
void EASYLZMA_API elzma_compress_free(elzma_compress_handle * hand);
|
||||
|
||||
/**
|
||||
* Set configuration paramters for a compression run. If not called,
|
||||
* reasonable defaults will be used.
|
||||
*/
|
||||
int EASYLZMA_API elzma_compress_config(elzma_compress_handle hand,
|
||||
unsigned char lc,
|
||||
unsigned char lp,
|
||||
unsigned char pb,
|
||||
unsigned char level,
|
||||
unsigned int dictionarySize,
|
||||
elzma_file_format format,
|
||||
unsigned long long uncompressedSize);
|
||||
|
||||
/**
|
||||
* Run compression
|
||||
*/
|
||||
int EASYLZMA_API elzma_compress_run(
|
||||
elzma_compress_handle hand,
|
||||
elzma_read_callback inputStream, void * inputContext,
|
||||
elzma_write_callback outputStream, void * outputContext,
|
||||
elzma_progress_callback progressCallback, void * progressContext);
|
||||
|
||||
|
||||
/**
|
||||
* a heuristic utility routine to guess a dictionary size that gets near
|
||||
* optimal compression while reducing memory usage.
|
||||
* accepts a size in bytes, returns a proposed dictionary size
|
||||
*/
|
||||
unsigned int EASYLZMA_API elzma_get_dict_size(unsigned long long size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif
|
||||
62
vendor/easylzma/src/easylzma/decompress.h
vendored
62
vendor/easylzma/src/easylzma/decompress.h
vendored
|
|
@ -1,62 +0,0 @@
|
|||
/*
|
||||
* Written in 2009 by Lloyd Hilaiel
|
||||
*
|
||||
* License
|
||||
*
|
||||
* All the cruft you find here is public domain. You don't have to credit
|
||||
* anyone to use this code, but my personal request is that you mention
|
||||
* Igor Pavlov for his hard, high quality work.
|
||||
*
|
||||
* easylzma/decompress.h - The API for LZMA decompression using easylzma
|
||||
*/
|
||||
|
||||
#ifndef __EASYLZMADECOMPRESS_H__
|
||||
#define __EASYLZMADECOMPRESS_H__
|
||||
|
||||
#include "easylzma/common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** an opaque handle to an lzma decompressor */
|
||||
typedef struct _elzma_decompress_handle * elzma_decompress_handle;
|
||||
|
||||
/**
|
||||
* Allocate a handle to an LZMA decompressor object.
|
||||
*/
|
||||
elzma_decompress_handle EASYLZMA_API elzma_decompress_alloc();
|
||||
|
||||
/**
|
||||
* set allocation routines (optional, if not called malloc & free will
|
||||
* be used)
|
||||
*/
|
||||
void EASYLZMA_API elzma_decompress_set_allocation_callbacks(
|
||||
elzma_decompress_handle hand,
|
||||
elzma_malloc mallocFunc, void * mallocFuncContext,
|
||||
elzma_free freeFunc, void * freeFuncContext);
|
||||
|
||||
/**
|
||||
* Free all data associated with an LZMA decompressor object.
|
||||
*/
|
||||
void EASYLZMA_API elzma_decompress_free(elzma_decompress_handle * hand);
|
||||
|
||||
/**
|
||||
* Perform decompression
|
||||
*
|
||||
* XXX: should the library automatically detect format by reading stream?
|
||||
* currently it's based on data external to stream (such as extension
|
||||
* or convention)
|
||||
*/
|
||||
int EASYLZMA_API elzma_decompress_run(
|
||||
elzma_decompress_handle hand,
|
||||
elzma_read_callback inputStream, void * inputContext,
|
||||
elzma_write_callback outputStream, void * outputContext,
|
||||
elzma_file_format format);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif
|
||||
103
vendor/easylzma/src/lzip_header.c
vendored
103
vendor/easylzma/src/lzip_header.c
vendored
|
|
@ -1,103 +0,0 @@
|
|||
#include "lzip_header.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#define ELZMA_LZIP_HEADER_SIZE 6
|
||||
#define ELZMA_LZIP_FOOTER_SIZE 12
|
||||
|
||||
static
|
||||
void initLzipHeader(struct elzma_file_header * hdr)
|
||||
{
|
||||
memset((void *) hdr, 0, sizeof(struct elzma_file_header));
|
||||
}
|
||||
|
||||
static
|
||||
int parseLzipHeader(const unsigned char * hdrBuf,
|
||||
struct elzma_file_header * hdr)
|
||||
{
|
||||
if (0 != strncmp("LZIP", (char *) hdrBuf, 4)) return 1;
|
||||
/* XXX: ignore version for now */
|
||||
hdr->pb = 2;
|
||||
hdr->lp = 0;
|
||||
hdr->lc = 3;
|
||||
/* unknown at this point */
|
||||
hdr->isStreamed = 1;
|
||||
hdr->uncompressedSize = 0;
|
||||
hdr->dictSize = 1 << (hdrBuf[5] & 0x1F);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
serializeLzipHeader(unsigned char * hdrBuf,
|
||||
const struct elzma_file_header * hdr)
|
||||
{
|
||||
hdrBuf[0] = 'L';
|
||||
hdrBuf[1] = 'Z';
|
||||
hdrBuf[2] = 'I';
|
||||
hdrBuf[3] = 'P';
|
||||
hdrBuf[4] = 0;
|
||||
{
|
||||
int r = 0;
|
||||
while ((hdr->dictSize >> r) != 0) r++;
|
||||
hdrBuf[5] = (unsigned char) (r-1) & 0x1F;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
serializeLzipFooter(struct elzma_file_footer * ftr,
|
||||
unsigned char * ftrBuf)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
|
||||
/* first crc32 */
|
||||
for (i = 0; i < 4; i++) {
|
||||
*(ftrBuf++) = (unsigned char) (ftr->crc32 >> (i * 8));
|
||||
}
|
||||
|
||||
/* next data size */
|
||||
for (i = 0; i < 8; i++) {
|
||||
*(ftrBuf++) = (unsigned char) (ftr->uncompressedSize >> (i * 8));
|
||||
}
|
||||
|
||||
/* write version 0 files, omit member length for now*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
parseLzipFooter(const unsigned char * ftrBuf,
|
||||
struct elzma_file_footer * ftr)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
ftr->crc32 = 0;
|
||||
ftr->uncompressedSize = 0;
|
||||
|
||||
/* first crc32 */
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
ftr->crc32 += ((unsigned int) *(ftrBuf++) << (i * 8));
|
||||
}
|
||||
|
||||
/* next data size */
|
||||
for (i = 0; i < 8; i++) {
|
||||
ftr->uncompressedSize +=
|
||||
(unsigned long long) *(ftrBuf++) << (i * 8);
|
||||
}
|
||||
/* read version 0 files, omit member length for now*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
initializeLZIPFormatHandler(struct elzma_format_handler * hand)
|
||||
{
|
||||
hand->header_size = ELZMA_LZIP_HEADER_SIZE;
|
||||
hand->init_header = initLzipHeader;
|
||||
hand->parse_header = parseLzipHeader;
|
||||
hand->serialize_header = serializeLzipHeader;
|
||||
hand->footer_size = ELZMA_LZIP_FOOTER_SIZE;
|
||||
hand->serialize_footer = serializeLzipFooter;
|
||||
hand->parse_footer = parseLzipFooter;
|
||||
}
|
||||
|
||||
11
vendor/easylzma/src/lzip_header.h
vendored
11
vendor/easylzma/src/lzip_header.h
vendored
|
|
@ -1,11 +0,0 @@
|
|||
#ifndef __EASYLZMA_LZIP_HEADER__
|
||||
#define __EASYLZMA_LZIP_HEADER__
|
||||
|
||||
#include "common_internal.h"
|
||||
|
||||
/* lzip file format documented here:
|
||||
* http://download.savannah.gnu.org/releases-noredirect/lzip/manual/ */
|
||||
|
||||
void initializeLZIPFormatHandler(struct elzma_format_handler * hand);
|
||||
|
||||
#endif
|
||||
130
vendor/easylzma/src/lzma_c.c
vendored
130
vendor/easylzma/src/lzma_c.c
vendored
|
|
@ -1,130 +0,0 @@
|
|||
|
||||
#include "lzma_c.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
struct dataStream
|
||||
{
|
||||
const unsigned char * inData;
|
||||
size_t inLen;
|
||||
|
||||
unsigned char * outData;
|
||||
size_t outLen;
|
||||
};
|
||||
|
||||
static int
|
||||
inputCallback(void *ctx, void *buf, size_t * size)
|
||||
{
|
||||
size_t rd = 0;
|
||||
struct dataStream * ds = (struct dataStream *) ctx;
|
||||
assert(ds != NULL);
|
||||
|
||||
rd = (ds->inLen < *size) ? ds->inLen : *size;
|
||||
|
||||
if (rd > 0) {
|
||||
memcpy(buf, (void *) ds->inData, rd);
|
||||
ds->inData += rd;
|
||||
ds->inLen -= rd;
|
||||
}
|
||||
|
||||
*size = rd;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
outputCallback(void *ctx, const void *buf, size_t size)
|
||||
{
|
||||
struct dataStream * ds = (struct dataStream *) ctx;
|
||||
assert(ds != NULL);
|
||||
|
||||
if (size > 0) {
|
||||
ds->outData = realloc(ds->outData, ds->outLen + size);
|
||||
memcpy((void *) (ds->outData + ds->outLen), buf, size);
|
||||
ds->outLen += size;
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
int
|
||||
simpleCompress(elzma_file_format format, const unsigned char * inData,
|
||||
size_t inLen, unsigned char ** outData,
|
||||
size_t * outLen)
|
||||
{
|
||||
int rc = 0;
|
||||
elzma_compress_handle hand;
|
||||
|
||||
/* allocate compression handle */
|
||||
hand = elzma_compress_alloc();
|
||||
assert(hand != NULL);
|
||||
|
||||
rc = elzma_compress_config(hand, ELZMA_LC_DEFAULT,
|
||||
ELZMA_LP_DEFAULT, ELZMA_PB_DEFAULT,
|
||||
5, (1 << 20) /* 1mb */,
|
||||
format, inLen);
|
||||
|
||||
if (rc != ELZMA_E_OK) {
|
||||
elzma_compress_free(&hand);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* now run the compression */
|
||||
{
|
||||
struct dataStream ds;
|
||||
ds.inData = inData;
|
||||
ds.inLen = inLen;
|
||||
ds.outData = NULL;
|
||||
ds.outLen = 0;
|
||||
|
||||
rc = elzma_compress_run(hand, inputCallback, (void *) &ds,
|
||||
outputCallback, (void *) &ds,
|
||||
NULL, NULL);
|
||||
|
||||
if (rc != ELZMA_E_OK) {
|
||||
if (ds.outData != NULL) free(ds.outData);
|
||||
elzma_compress_free(&hand);
|
||||
return rc;
|
||||
}
|
||||
|
||||
*outData = ds.outData;
|
||||
*outLen = ds.outLen;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int
|
||||
simpleDecompress(elzma_file_format format, const unsigned char * inData,
|
||||
size_t inLen, unsigned char ** outData,
|
||||
size_t * outLen)
|
||||
{
|
||||
int rc = 0;
|
||||
elzma_decompress_handle hand;
|
||||
|
||||
hand = elzma_decompress_alloc();
|
||||
|
||||
/* now run the compression */
|
||||
{
|
||||
struct dataStream ds;
|
||||
ds.inData = inData;
|
||||
ds.inLen = inLen;
|
||||
ds.outData = NULL;
|
||||
ds.outLen = 0;
|
||||
|
||||
rc = elzma_decompress_run(hand, inputCallback, (void *) &ds,
|
||||
outputCallback, (void *) &ds, format);
|
||||
|
||||
if (rc != ELZMA_E_OK) {
|
||||
if (ds.outData != NULL) free(ds.outData);
|
||||
elzma_decompress_free(&hand);
|
||||
return rc;
|
||||
}
|
||||
|
||||
*outData = ds.outData;
|
||||
*outLen = ds.outLen;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
34
vendor/easylzma/src/lzma_c.h
vendored
34
vendor/easylzma/src/lzma_c.h
vendored
|
|
@ -1,34 +0,0 @@
|
|||
#ifndef __LZMA_C_H__
|
||||
#define __LZMA_C_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "easylzma/compress.h"
|
||||
#include "easylzma/decompress.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* compress a chunk of memory and return a dynamically allocated buffer
|
||||
* if successful. return value is an easylzma error code */
|
||||
int simpleCompress(elzma_file_format format,
|
||||
const unsigned char * inData,
|
||||
size_t inLen,
|
||||
unsigned char ** outData,
|
||||
size_t * outLen);
|
||||
|
||||
/* decompress a chunk of memory and return a dynamically allocated buffer
|
||||
* if successful. return value is an easylzma error code */
|
||||
int simpleDecompress(elzma_file_format format,
|
||||
const unsigned char * inData,
|
||||
size_t inLen,
|
||||
unsigned char ** outData,
|
||||
size_t * outLen);
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
141
vendor/easylzma/src/lzma_header.c
vendored
141
vendor/easylzma/src/lzma_header.c
vendored
|
|
@ -1,141 +0,0 @@
|
|||
/*
|
||||
* Written in 2009 by Lloyd Hilaiel
|
||||
*
|
||||
* License
|
||||
*
|
||||
* All the cruft you find here is public domain. You don't have to credit
|
||||
* anyone to use this code, but my personal request is that you mention
|
||||
* Igor Pavlov for his hard, high quality work.
|
||||
*/
|
||||
|
||||
/* XXX: clean this up, it's mostly lifted from pavel */
|
||||
|
||||
#include "lzma_header.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#define ELZMA_LZMA_HEADER_SIZE 13
|
||||
#define ELZMA_LZMA_PROPSBUF_SIZE 5
|
||||
|
||||
/****************
|
||||
Header parsing
|
||||
****************/
|
||||
|
||||
#ifndef UINT64_MAX
|
||||
#define UINT64_MAX ((unsigned long long) -1)
|
||||
#endif
|
||||
|
||||
/* Parse the properties byte */
|
||||
static char
|
||||
lzmadec_header_properties (
|
||||
unsigned char *pb, unsigned char *lp, unsigned char *lc, const unsigned char c)
|
||||
{
|
||||
/* pb, lp and lc are encoded into a single byte. */
|
||||
if (c > (9 * 5 * 5))
|
||||
return -1;
|
||||
*pb = c / (9 * 5); /* 0 <= pb <= 4 */
|
||||
*lp = (c % (9 * 5)) / 9; /* 0 <= lp <= 4 */
|
||||
*lc = c % 9; /* 0 <= lc <= 8 */
|
||||
|
||||
assert (*pb < 5 && *lp < 5 && *lc < 9);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Parse the dictionary size (4 bytes, little endian) */
|
||||
static char
|
||||
lzmadec_header_dictionary (unsigned int *size, const unsigned char *buffer)
|
||||
{
|
||||
unsigned int i;
|
||||
*size = 0;
|
||||
for (i = 0; i < 4; i++)
|
||||
*size += (unsigned int)(*buffer++) << (i * 8);
|
||||
/* The dictionary size is limited to 256 MiB (checked from
|
||||
* LZMA SDK 4.30) */
|
||||
if (*size > (1 << 28))
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Parse the uncompressed size field (8 bytes, little endian) */
|
||||
static void
|
||||
lzmadec_header_uncompressed (unsigned long long *size,
|
||||
unsigned char *is_streamed,
|
||||
const unsigned char *buffer)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
/* Streamed files have all 64 bits set in the size field.
|
||||
* We don't know the uncompressed size beforehand. */
|
||||
*is_streamed = 1; /* Assume streamed. */
|
||||
*size = 0;
|
||||
for (i = 0; i < 8; i++) {
|
||||
*size += (unsigned long long)buffer[i] << (i * 8);
|
||||
if (buffer[i] != 255)
|
||||
*is_streamed = 0;
|
||||
}
|
||||
assert ((*is_streamed == 1 && *size == UINT64_MAX)
|
||||
|| (*is_streamed == 0 && *size < UINT64_MAX));
|
||||
}
|
||||
|
||||
static void
|
||||
initLzmaHeader(struct elzma_file_header * hdr)
|
||||
{
|
||||
memset((void *) hdr, 0, sizeof(struct elzma_file_header));
|
||||
}
|
||||
|
||||
static int
|
||||
parseLzmaHeader(const unsigned char * hdrBuf,
|
||||
struct elzma_file_header * hdr)
|
||||
{
|
||||
if (lzmadec_header_properties(&(hdr->pb), &(hdr->lp), &(hdr->lc),
|
||||
*hdrBuf) ||
|
||||
lzmadec_header_dictionary(&(hdr->dictSize), hdrBuf + 1))
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
lzmadec_header_uncompressed(&(hdr->uncompressedSize),
|
||||
&(hdr->isStreamed),
|
||||
hdrBuf + 5);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
serializeLzmaHeader(unsigned char * hdrBuf,
|
||||
const struct elzma_file_header * hdr)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
memset((void *) hdrBuf, 0, ELZMA_LZMA_HEADER_SIZE);
|
||||
|
||||
/* encode lc, pb, and lp */
|
||||
*hdrBuf++ = hdr->lc + (hdr->pb * 45) + (hdr->lp * 45 * 9);
|
||||
|
||||
/* encode dictionary size */
|
||||
for (i = 0; i < 4; i++) {
|
||||
*(hdrBuf++) = (unsigned char) (hdr->dictSize >> (i * 8));
|
||||
}
|
||||
|
||||
/* encode uncompressed size */
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (hdr->isStreamed) {
|
||||
*(hdrBuf++) = 0xff;
|
||||
} else {
|
||||
*(hdrBuf++) = (unsigned char) (hdr->uncompressedSize >> (i * 8));
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
initializeLZMAFormatHandler(struct elzma_format_handler * hand)
|
||||
{
|
||||
hand->header_size = ELZMA_LZMA_HEADER_SIZE;
|
||||
hand->init_header = initLzmaHeader;
|
||||
hand->parse_header = parseLzmaHeader;
|
||||
hand->serialize_header = serializeLzmaHeader;
|
||||
hand->footer_size = 0;
|
||||
hand->serialize_footer = NULL;
|
||||
}
|
||||
10
vendor/easylzma/src/lzma_header.h
vendored
10
vendor/easylzma/src/lzma_header.h
vendored
|
|
@ -1,10 +0,0 @@
|
|||
#ifndef __EASYLZMA_LZMA_HEADER__
|
||||
#define __EASYLZMA_LZMA_HEADER__
|
||||
|
||||
#include "common_internal.h"
|
||||
|
||||
/* LZMA-Alone header format gleaned from reading Igor's code */
|
||||
|
||||
void initializeLZMAFormatHandler(struct elzma_format_handler * hand);
|
||||
|
||||
#endif
|
||||
36
vendor/easylzma/src/pavlov/7zBuf.c
vendored
36
vendor/easylzma/src/pavlov/7zBuf.c
vendored
|
|
@ -1,36 +0,0 @@
|
|||
/* 7zBuf.c -- Byte Buffer
|
||||
2008-03-28
|
||||
Igor Pavlov
|
||||
Public domain */
|
||||
|
||||
#include "7zBuf.h"
|
||||
|
||||
void Buf_Init(CBuf *p)
|
||||
{
|
||||
p->data = 0;
|
||||
p->size = 0;
|
||||
}
|
||||
|
||||
int Buf_Create(CBuf *p, size_t size, ISzAlloc *alloc)
|
||||
{
|
||||
p->size = 0;
|
||||
if (size == 0)
|
||||
{
|
||||
p->data = 0;
|
||||
return 1;
|
||||
}
|
||||
p->data = (Byte *)alloc->Alloc(alloc, size);
|
||||
if (p->data != 0)
|
||||
{
|
||||
p->size = size;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Buf_Free(CBuf *p, ISzAlloc *alloc)
|
||||
{
|
||||
alloc->Free(alloc, p->data);
|
||||
p->data = 0;
|
||||
p->size = 0;
|
||||
}
|
||||
31
vendor/easylzma/src/pavlov/7zBuf.h
vendored
31
vendor/easylzma/src/pavlov/7zBuf.h
vendored
|
|
@ -1,31 +0,0 @@
|
|||
/* 7zBuf.h -- Byte Buffer
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __7Z_BUF_H
|
||||
#define __7Z_BUF_H
|
||||
|
||||
#include "Types.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
Byte *data;
|
||||
size_t size;
|
||||
} CBuf;
|
||||
|
||||
void Buf_Init(CBuf *p);
|
||||
int Buf_Create(CBuf *p, size_t size, ISzAlloc *alloc);
|
||||
void Buf_Free(CBuf *p, ISzAlloc *alloc);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
Byte *data;
|
||||
size_t size;
|
||||
size_t pos;
|
||||
} CDynBuf;
|
||||
|
||||
void DynBuf_Construct(CDynBuf *p);
|
||||
void DynBuf_SeekToBeg(CDynBuf *p);
|
||||
int DynBuf_Write(CDynBuf *p, const Byte *buf, size_t size, ISzAlloc *alloc);
|
||||
void DynBuf_Free(CDynBuf *p, ISzAlloc *alloc);
|
||||
|
||||
#endif
|
||||
45
vendor/easylzma/src/pavlov/7zBuf2.c
vendored
45
vendor/easylzma/src/pavlov/7zBuf2.c
vendored
|
|
@ -1,45 +0,0 @@
|
|||
/* 7zBuf2.c -- Byte Buffer
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#include <string.h>
|
||||
#include "7zBuf.h"
|
||||
|
||||
void DynBuf_Construct(CDynBuf *p)
|
||||
{
|
||||
p->data = 0;
|
||||
p->size = 0;
|
||||
p->pos = 0;
|
||||
}
|
||||
|
||||
void DynBuf_SeekToBeg(CDynBuf *p)
|
||||
{
|
||||
p->pos = 0;
|
||||
}
|
||||
|
||||
int DynBuf_Write(CDynBuf *p, const Byte *buf, size_t size, ISzAlloc *alloc)
|
||||
{
|
||||
if (size > p->size - p->pos)
|
||||
{
|
||||
size_t newSize = p->pos + size;
|
||||
Byte *data;
|
||||
newSize += newSize / 4;
|
||||
data = (Byte *)alloc->Alloc(alloc, newSize);
|
||||
if (data == 0)
|
||||
return 0;
|
||||
p->size = newSize;
|
||||
memcpy(data, p->data, p->pos);
|
||||
alloc->Free(alloc, p->data);
|
||||
p->data = data;
|
||||
}
|
||||
memcpy(p->data + p->pos, buf, size);
|
||||
p->pos += size;
|
||||
return 1;
|
||||
}
|
||||
|
||||
void DynBuf_Free(CDynBuf *p, ISzAlloc *alloc)
|
||||
{
|
||||
alloc->Free(alloc, p->data);
|
||||
p->data = 0;
|
||||
p->size = 0;
|
||||
p->pos = 0;
|
||||
}
|
||||
35
vendor/easylzma/src/pavlov/7zCrc.c
vendored
35
vendor/easylzma/src/pavlov/7zCrc.c
vendored
|
|
@ -1,35 +0,0 @@
|
|||
/* 7zCrc.c -- CRC32 calculation
|
||||
2008-08-05
|
||||
Igor Pavlov
|
||||
Public domain */
|
||||
|
||||
#include "7zCrc.h"
|
||||
|
||||
#define kCrcPoly 0xEDB88320
|
||||
UInt32 g_CrcTable[256];
|
||||
|
||||
void MY_FAST_CALL CrcGenerateTable(void)
|
||||
{
|
||||
UInt32 i;
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
UInt32 r = i;
|
||||
int j;
|
||||
for (j = 0; j < 8; j++)
|
||||
r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1));
|
||||
g_CrcTable[i] = r;
|
||||
}
|
||||
}
|
||||
|
||||
UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void *data, size_t size)
|
||||
{
|
||||
const Byte *p = (const Byte *)data;
|
||||
for (; size > 0 ; size--, p++)
|
||||
v = CRC_UPDATE_BYTE(v, *p);
|
||||
return v;
|
||||
}
|
||||
|
||||
UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size)
|
||||
{
|
||||
return CrcUpdate(CRC_INIT_VAL, data, size) ^ 0xFFFFFFFF;
|
||||
}
|
||||
24
vendor/easylzma/src/pavlov/7zCrc.h
vendored
24
vendor/easylzma/src/pavlov/7zCrc.h
vendored
|
|
@ -1,24 +0,0 @@
|
|||
/* 7zCrc.h -- CRC32 calculation
|
||||
2008-03-13
|
||||
Igor Pavlov
|
||||
Public domain */
|
||||
|
||||
#ifndef __7Z_CRC_H
|
||||
#define __7Z_CRC_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "Types.h"
|
||||
|
||||
extern UInt32 g_CrcTable[];
|
||||
|
||||
void MY_FAST_CALL CrcGenerateTable(void);
|
||||
|
||||
#define CRC_INIT_VAL 0xFFFFFFFF
|
||||
#define CRC_GET_DIGEST(crc) ((crc) ^ 0xFFFFFFFF)
|
||||
#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
|
||||
|
||||
UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void *data, size_t size);
|
||||
UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size);
|
||||
|
||||
#endif
|
||||
263
vendor/easylzma/src/pavlov/7zFile.c
vendored
263
vendor/easylzma/src/pavlov/7zFile.c
vendored
|
|
@ -1,263 +0,0 @@
|
|||
/* 7zFile.c -- File IO
|
||||
2008-11-22 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "7zFile.h"
|
||||
|
||||
#ifndef USE_WINDOWS_FILE
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef USE_WINDOWS_FILE
|
||||
|
||||
/*
|
||||
ReadFile and WriteFile functions in Windows have BUG:
|
||||
If you Read or Write 64MB or more (probably min_failure_size = 64MB - 32KB + 1)
|
||||
from/to Network file, it returns ERROR_NO_SYSTEM_RESOURCES
|
||||
(Insufficient system resources exist to complete the requested service).
|
||||
Probably in some version of Windows there are problems with other sizes:
|
||||
for 32 MB (maybe also for 16 MB).
|
||||
And message can be "Network connection was lost"
|
||||
*/
|
||||
|
||||
#define kChunkSizeMax (1 << 22)
|
||||
|
||||
#endif
|
||||
|
||||
void File_Construct(CSzFile *p)
|
||||
{
|
||||
#ifdef USE_WINDOWS_FILE
|
||||
p->handle = INVALID_HANDLE_VALUE;
|
||||
#else
|
||||
p->file = NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
static WRes File_Open(CSzFile *p, const char *name, int writeMode)
|
||||
{
|
||||
#ifdef USE_WINDOWS_FILE
|
||||
p->handle = CreateFileA(name,
|
||||
writeMode ? GENERIC_WRITE : GENERIC_READ,
|
||||
FILE_SHARE_READ, NULL,
|
||||
writeMode ? CREATE_ALWAYS : OPEN_EXISTING,
|
||||
FILE_ATTRIBUTE_NORMAL, NULL);
|
||||
return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError();
|
||||
#else
|
||||
p->file = fopen(name, writeMode ? "wb+" : "rb");
|
||||
return (p->file != 0) ? 0 : errno;
|
||||
#endif
|
||||
}
|
||||
|
||||
WRes InFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 0); }
|
||||
WRes OutFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 1); }
|
||||
|
||||
WRes File_Close(CSzFile *p)
|
||||
{
|
||||
#ifdef USE_WINDOWS_FILE
|
||||
if (p->handle != INVALID_HANDLE_VALUE)
|
||||
{
|
||||
if (!CloseHandle(p->handle))
|
||||
return GetLastError();
|
||||
p->handle = INVALID_HANDLE_VALUE;
|
||||
}
|
||||
#else
|
||||
if (p->file != NULL)
|
||||
{
|
||||
int res = fclose(p->file);
|
||||
if (res != 0)
|
||||
return res;
|
||||
p->file = NULL;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
WRes File_Read(CSzFile *p, void *data, size_t *size)
|
||||
{
|
||||
size_t originalSize = *size;
|
||||
if (originalSize == 0)
|
||||
return 0;
|
||||
|
||||
#ifdef USE_WINDOWS_FILE
|
||||
|
||||
*size = 0;
|
||||
do
|
||||
{
|
||||
DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
|
||||
DWORD processed = 0;
|
||||
BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL);
|
||||
data = (void *)((Byte *)data + processed);
|
||||
originalSize -= processed;
|
||||
*size += processed;
|
||||
if (!res)
|
||||
return GetLastError();
|
||||
if (processed == 0)
|
||||
break;
|
||||
}
|
||||
while (originalSize > 0);
|
||||
return 0;
|
||||
|
||||
#else
|
||||
|
||||
*size = fread(data, 1, originalSize, p->file);
|
||||
if (*size == originalSize)
|
||||
return 0;
|
||||
return ferror(p->file);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
WRes File_Write(CSzFile *p, const void *data, size_t *size)
|
||||
{
|
||||
size_t originalSize = *size;
|
||||
if (originalSize == 0)
|
||||
return 0;
|
||||
|
||||
#ifdef USE_WINDOWS_FILE
|
||||
|
||||
*size = 0;
|
||||
do
|
||||
{
|
||||
DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
|
||||
DWORD processed = 0;
|
||||
BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL);
|
||||
data = (void *)((Byte *)data + processed);
|
||||
originalSize -= processed;
|
||||
*size += processed;
|
||||
if (!res)
|
||||
return GetLastError();
|
||||
if (processed == 0)
|
||||
break;
|
||||
}
|
||||
while (originalSize > 0);
|
||||
return 0;
|
||||
|
||||
#else
|
||||
|
||||
*size = fwrite(data, 1, originalSize, p->file);
|
||||
if (*size == originalSize)
|
||||
return 0;
|
||||
return ferror(p->file);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
|
||||
{
|
||||
#ifdef USE_WINDOWS_FILE
|
||||
|
||||
LARGE_INTEGER value;
|
||||
DWORD moveMethod;
|
||||
value.LowPart = (DWORD)*pos;
|
||||
value.HighPart = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */
|
||||
switch (origin)
|
||||
{
|
||||
case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break;
|
||||
case SZ_SEEK_CUR: moveMethod = FILE_CURRENT; break;
|
||||
case SZ_SEEK_END: moveMethod = FILE_END; break;
|
||||
default: return ERROR_INVALID_PARAMETER;
|
||||
}
|
||||
value.LowPart = SetFilePointer(p->handle, value.LowPart, &value.HighPart, moveMethod);
|
||||
if (value.LowPart == 0xFFFFFFFF)
|
||||
{
|
||||
WRes res = GetLastError();
|
||||
if (res != NO_ERROR)
|
||||
return res;
|
||||
}
|
||||
*pos = ((Int64)value.HighPart << 32) | value.LowPart;
|
||||
return 0;
|
||||
|
||||
#else
|
||||
|
||||
int moveMethod;
|
||||
int res;
|
||||
switch (origin)
|
||||
{
|
||||
case SZ_SEEK_SET: moveMethod = SEEK_SET; break;
|
||||
case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break;
|
||||
case SZ_SEEK_END: moveMethod = SEEK_END; break;
|
||||
default: return 1;
|
||||
}
|
||||
res = fseek(p->file, (long)*pos, moveMethod);
|
||||
*pos = ftell(p->file);
|
||||
return res;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
WRes File_GetLength(CSzFile *p, UInt64 *length)
|
||||
{
|
||||
#ifdef USE_WINDOWS_FILE
|
||||
|
||||
DWORD sizeHigh;
|
||||
DWORD sizeLow = GetFileSize(p->handle, &sizeHigh);
|
||||
if (sizeLow == 0xFFFFFFFF)
|
||||
{
|
||||
DWORD res = GetLastError();
|
||||
if (res != NO_ERROR)
|
||||
return res;
|
||||
}
|
||||
*length = (((UInt64)sizeHigh) << 32) + sizeLow;
|
||||
return 0;
|
||||
|
||||
#else
|
||||
|
||||
long pos = ftell(p->file);
|
||||
int res = fseek(p->file, 0, SEEK_END);
|
||||
*length = ftell(p->file);
|
||||
fseek(p->file, pos, SEEK_SET);
|
||||
return res;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* ---------- FileSeqInStream ---------- */
|
||||
|
||||
static SRes FileSeqInStream_Read(void *pp, void *buf, size_t *size)
|
||||
{
|
||||
CFileSeqInStream *p = (CFileSeqInStream *)pp;
|
||||
return File_Read(&p->file, buf, size) == 0 ? SZ_OK : SZ_ERROR_READ;
|
||||
}
|
||||
|
||||
void FileSeqInStream_CreateVTable(CFileSeqInStream *p)
|
||||
{
|
||||
p->s.Read = FileSeqInStream_Read;
|
||||
}
|
||||
|
||||
|
||||
/* ---------- FileInStream ---------- */
|
||||
|
||||
static SRes FileInStream_Read(void *pp, void *buf, size_t *size)
|
||||
{
|
||||
CFileInStream *p = (CFileInStream *)pp;
|
||||
return (File_Read(&p->file, buf, size) == 0) ? SZ_OK : SZ_ERROR_READ;
|
||||
}
|
||||
|
||||
static SRes FileInStream_Seek(void *pp, Int64 *pos, ESzSeek origin)
|
||||
{
|
||||
CFileInStream *p = (CFileInStream *)pp;
|
||||
return File_Seek(&p->file, pos, origin);
|
||||
}
|
||||
|
||||
void FileInStream_CreateVTable(CFileInStream *p)
|
||||
{
|
||||
p->s.Read = FileInStream_Read;
|
||||
p->s.Seek = FileInStream_Seek;
|
||||
}
|
||||
|
||||
|
||||
/* ---------- FileOutStream ---------- */
|
||||
|
||||
static size_t FileOutStream_Write(void *pp, const void *data, size_t size)
|
||||
{
|
||||
CFileOutStream *p = (CFileOutStream *)pp;
|
||||
File_Write(&p->file, data, &size);
|
||||
return size;
|
||||
}
|
||||
|
||||
void FileOutStream_CreateVTable(CFileOutStream *p)
|
||||
{
|
||||
p->s.Write = FileOutStream_Write;
|
||||
}
|
||||
74
vendor/easylzma/src/pavlov/7zFile.h
vendored
74
vendor/easylzma/src/pavlov/7zFile.h
vendored
|
|
@ -1,74 +0,0 @@
|
|||
/* 7zFile.h -- File IO
|
||||
2008-11-22 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __7Z_FILE_H
|
||||
#define __7Z_FILE_H
|
||||
|
||||
#ifdef _WIN32
|
||||
#define USE_WINDOWS_FILE
|
||||
#endif
|
||||
|
||||
#ifdef USE_WINDOWS_FILE
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#include "Types.h"
|
||||
|
||||
|
||||
/* ---------- File ---------- */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
#ifdef USE_WINDOWS_FILE
|
||||
HANDLE handle;
|
||||
#else
|
||||
FILE *file;
|
||||
#endif
|
||||
} CSzFile;
|
||||
|
||||
void File_Construct(CSzFile *p);
|
||||
WRes InFile_Open(CSzFile *p, const char *name);
|
||||
WRes OutFile_Open(CSzFile *p, const char *name);
|
||||
WRes File_Close(CSzFile *p);
|
||||
|
||||
/* reads max(*size, remain file's size) bytes */
|
||||
WRes File_Read(CSzFile *p, void *data, size_t *size);
|
||||
|
||||
/* writes *size bytes */
|
||||
WRes File_Write(CSzFile *p, const void *data, size_t *size);
|
||||
|
||||
WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin);
|
||||
WRes File_GetLength(CSzFile *p, UInt64 *length);
|
||||
|
||||
|
||||
/* ---------- FileInStream ---------- */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ISeqInStream s;
|
||||
CSzFile file;
|
||||
} CFileSeqInStream;
|
||||
|
||||
void FileSeqInStream_CreateVTable(CFileSeqInStream *p);
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ISeekInStream s;
|
||||
CSzFile file;
|
||||
} CFileInStream;
|
||||
|
||||
void FileInStream_CreateVTable(CFileInStream *p);
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ISeqOutStream s;
|
||||
CSzFile file;
|
||||
} CFileOutStream;
|
||||
|
||||
void FileOutStream_CreateVTable(CFileOutStream *p);
|
||||
|
||||
#endif
|
||||
169
vendor/easylzma/src/pavlov/7zStream.c
vendored
169
vendor/easylzma/src/pavlov/7zStream.c
vendored
|
|
@ -1,169 +0,0 @@
|
|||
/* 7zStream.c -- 7z Stream functions
|
||||
2008-11-23 : Igor Pavlov : Public domain */
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "Types.h"
|
||||
|
||||
SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType)
|
||||
{
|
||||
while (size != 0)
|
||||
{
|
||||
size_t processed = size;
|
||||
RINOK(stream->Read(stream, buf, &processed));
|
||||
if (processed == 0)
|
||||
return errorType;
|
||||
buf = (void *)((Byte *)buf + processed);
|
||||
size -= processed;
|
||||
}
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size)
|
||||
{
|
||||
return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
|
||||
}
|
||||
|
||||
SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf)
|
||||
{
|
||||
size_t processed = 1;
|
||||
RINOK(stream->Read(stream, buf, &processed));
|
||||
return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF;
|
||||
}
|
||||
|
||||
SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset)
|
||||
{
|
||||
Int64 t = offset;
|
||||
return stream->Seek(stream, &t, SZ_SEEK_SET);
|
||||
}
|
||||
|
||||
SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size)
|
||||
{
|
||||
void *lookBuf;
|
||||
if (*size == 0)
|
||||
return SZ_OK;
|
||||
RINOK(stream->Look(stream, &lookBuf, size));
|
||||
memcpy(buf, lookBuf, *size);
|
||||
return stream->Skip(stream, *size);
|
||||
}
|
||||
|
||||
SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType)
|
||||
{
|
||||
while (size != 0)
|
||||
{
|
||||
size_t processed = size;
|
||||
RINOK(stream->Read(stream, buf, &processed));
|
||||
if (processed == 0)
|
||||
return errorType;
|
||||
buf = (void *)((Byte *)buf + processed);
|
||||
size -= processed;
|
||||
}
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size)
|
||||
{
|
||||
return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
|
||||
}
|
||||
|
||||
static SRes LookToRead_Look_Lookahead(void *pp, void **buf, size_t *size)
|
||||
{
|
||||
SRes res = SZ_OK;
|
||||
CLookToRead *p = (CLookToRead *)pp;
|
||||
size_t size2 = p->size - p->pos;
|
||||
if (size2 == 0 && *size > 0)
|
||||
{
|
||||
p->pos = 0;
|
||||
size2 = LookToRead_BUF_SIZE;
|
||||
res = p->realStream->Read(p->realStream, p->buf, &size2);
|
||||
p->size = size2;
|
||||
}
|
||||
if (size2 < *size)
|
||||
*size = size2;
|
||||
*buf = p->buf + p->pos;
|
||||
return res;
|
||||
}
|
||||
|
||||
static SRes LookToRead_Look_Exact(void *pp, void **buf, size_t *size)
|
||||
{
|
||||
SRes res = SZ_OK;
|
||||
CLookToRead *p = (CLookToRead *)pp;
|
||||
size_t size2 = p->size - p->pos;
|
||||
if (size2 == 0 && *size > 0)
|
||||
{
|
||||
p->pos = 0;
|
||||
if (*size > LookToRead_BUF_SIZE)
|
||||
*size = LookToRead_BUF_SIZE;
|
||||
res = p->realStream->Read(p->realStream, p->buf, size);
|
||||
size2 = p->size = *size;
|
||||
}
|
||||
if (size2 < *size)
|
||||
*size = size2;
|
||||
*buf = p->buf + p->pos;
|
||||
return res;
|
||||
}
|
||||
|
||||
static SRes LookToRead_Skip(void *pp, size_t offset)
|
||||
{
|
||||
CLookToRead *p = (CLookToRead *)pp;
|
||||
p->pos += offset;
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
static SRes LookToRead_Read(void *pp, void *buf, size_t *size)
|
||||
{
|
||||
CLookToRead *p = (CLookToRead *)pp;
|
||||
size_t rem = p->size - p->pos;
|
||||
if (rem == 0)
|
||||
return p->realStream->Read(p->realStream, buf, size);
|
||||
if (rem > *size)
|
||||
rem = *size;
|
||||
memcpy(buf, p->buf + p->pos, rem);
|
||||
p->pos += rem;
|
||||
*size = rem;
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
static SRes LookToRead_Seek(void *pp, Int64 *pos, ESzSeek origin)
|
||||
{
|
||||
CLookToRead *p = (CLookToRead *)pp;
|
||||
p->pos = p->size = 0;
|
||||
return p->realStream->Seek(p->realStream, pos, origin);
|
||||
}
|
||||
|
||||
void LookToRead_CreateVTable(CLookToRead *p, int lookahead)
|
||||
{
|
||||
p->s.Look = lookahead ?
|
||||
LookToRead_Look_Lookahead :
|
||||
LookToRead_Look_Exact;
|
||||
p->s.Skip = LookToRead_Skip;
|
||||
p->s.Read = LookToRead_Read;
|
||||
p->s.Seek = LookToRead_Seek;
|
||||
}
|
||||
|
||||
void LookToRead_Init(CLookToRead *p)
|
||||
{
|
||||
p->pos = p->size = 0;
|
||||
}
|
||||
|
||||
static SRes SecToLook_Read(void *pp, void *buf, size_t *size)
|
||||
{
|
||||
CSecToLook *p = (CSecToLook *)pp;
|
||||
return LookInStream_LookRead(p->realStream, buf, size);
|
||||
}
|
||||
|
||||
void SecToLook_CreateVTable(CSecToLook *p)
|
||||
{
|
||||
p->s.Read = SecToLook_Read;
|
||||
}
|
||||
|
||||
static SRes SecToRead_Read(void *pp, void *buf, size_t *size)
|
||||
{
|
||||
CSecToRead *p = (CSecToRead *)pp;
|
||||
return p->realStream->Read(p->realStream, buf, size);
|
||||
}
|
||||
|
||||
void SecToRead_CreateVTable(CSecToRead *p)
|
||||
{
|
||||
p->s.Read = SecToRead_Read;
|
||||
}
|
||||
7
vendor/easylzma/src/pavlov/7zVersion.h
vendored
7
vendor/easylzma/src/pavlov/7zVersion.h
vendored
|
|
@ -1,7 +0,0 @@
|
|||
#define MY_VER_MAJOR 4
|
||||
#define MY_VER_MINOR 63
|
||||
#define MY_VER_BUILD 0
|
||||
#define MY_VERSION "4.63"
|
||||
#define MY_DATE "2008-12-31"
|
||||
#define MY_COPYRIGHT ": Igor Pavlov : Public domain"
|
||||
#define MY_VERSION_COPYRIGHT_DATE MY_VERSION " " MY_COPYRIGHT " : " MY_DATE
|
||||
127
vendor/easylzma/src/pavlov/Alloc.c
vendored
127
vendor/easylzma/src/pavlov/Alloc.c
vendored
|
|
@ -1,127 +0,0 @@
|
|||
/* Alloc.c -- Memory allocation functions
|
||||
2008-09-24
|
||||
Igor Pavlov
|
||||
Public domain */
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "Alloc.h"
|
||||
|
||||
/* #define _SZ_ALLOC_DEBUG */
|
||||
|
||||
/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
#include <stdio.h>
|
||||
int g_allocCount = 0;
|
||||
int g_allocCountMid = 0;
|
||||
int g_allocCountBig = 0;
|
||||
#endif
|
||||
|
||||
void *MyAlloc(size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return 0;
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
{
|
||||
void *p = malloc(size);
|
||||
fprintf(stderr, "\nAlloc %10d bytes, count = %10d, addr = %8X", size, g_allocCount++, (unsigned)p);
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
return malloc(size);
|
||||
#endif
|
||||
}
|
||||
|
||||
void MyFree(void *address)
|
||||
{
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
if (address != 0)
|
||||
fprintf(stderr, "\nFree; count = %10d, addr = %8X", --g_allocCount, (unsigned)address);
|
||||
#endif
|
||||
free(address);
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
void *MidAlloc(size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return 0;
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
fprintf(stderr, "\nAlloc_Mid %10d bytes; count = %10d", size, g_allocCountMid++);
|
||||
#endif
|
||||
return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE);
|
||||
}
|
||||
|
||||
void MidFree(void *address)
|
||||
{
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
if (address != 0)
|
||||
fprintf(stderr, "\nFree_Mid; count = %10d", --g_allocCountMid);
|
||||
#endif
|
||||
if (address == 0)
|
||||
return;
|
||||
VirtualFree(address, 0, MEM_RELEASE);
|
||||
}
|
||||
|
||||
#ifndef MEM_LARGE_PAGES
|
||||
#undef _7ZIP_LARGE_PAGES
|
||||
#endif
|
||||
|
||||
#ifdef _7ZIP_LARGE_PAGES
|
||||
SIZE_T g_LargePageSize = 0;
|
||||
typedef SIZE_T (WINAPI *GetLargePageMinimumP)();
|
||||
#endif
|
||||
|
||||
void SetLargePageSize()
|
||||
{
|
||||
#ifdef _7ZIP_LARGE_PAGES
|
||||
SIZE_T size = 0;
|
||||
GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP)
|
||||
GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum");
|
||||
if (largePageMinimum == 0)
|
||||
return;
|
||||
size = largePageMinimum();
|
||||
if (size == 0 || (size & (size - 1)) != 0)
|
||||
return;
|
||||
g_LargePageSize = size;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void *BigAlloc(size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return 0;
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
fprintf(stderr, "\nAlloc_Big %10d bytes; count = %10d", size, g_allocCountBig++);
|
||||
#endif
|
||||
|
||||
#ifdef _7ZIP_LARGE_PAGES
|
||||
if (g_LargePageSize != 0 && g_LargePageSize <= (1 << 30) && size >= (1 << 18))
|
||||
{
|
||||
void *res = VirtualAlloc(0, (size + g_LargePageSize - 1) & (~(g_LargePageSize - 1)),
|
||||
MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
|
||||
if (res != 0)
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE);
|
||||
}
|
||||
|
||||
void BigFree(void *address)
|
||||
{
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
if (address != 0)
|
||||
fprintf(stderr, "\nFree_Big; count = %10d", --g_allocCountBig);
|
||||
#endif
|
||||
|
||||
if (address == 0)
|
||||
return;
|
||||
VirtualFree(address, 0, MEM_RELEASE);
|
||||
}
|
||||
|
||||
#endif
|
||||
32
vendor/easylzma/src/pavlov/Alloc.h
vendored
32
vendor/easylzma/src/pavlov/Alloc.h
vendored
|
|
@ -1,32 +0,0 @@
|
|||
/* Alloc.h -- Memory allocation functions
|
||||
2008-03-13
|
||||
Igor Pavlov
|
||||
Public domain */
|
||||
|
||||
#ifndef __COMMON_ALLOC_H
|
||||
#define __COMMON_ALLOC_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
void *MyAlloc(size_t size);
|
||||
void MyFree(void *address);
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
void SetLargePageSize();
|
||||
|
||||
void *MidAlloc(size_t size);
|
||||
void MidFree(void *address);
|
||||
void *BigAlloc(size_t size);
|
||||
void BigFree(void *address);
|
||||
|
||||
#else
|
||||
|
||||
#define MidAlloc(size) MyAlloc(size)
|
||||
#define MidFree(address) MyFree(address)
|
||||
#define BigAlloc(size) MyAlloc(size)
|
||||
#define BigFree(address) MyFree(address)
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
132
vendor/easylzma/src/pavlov/Bcj2.c
vendored
132
vendor/easylzma/src/pavlov/Bcj2.c
vendored
|
|
@ -1,132 +0,0 @@
|
|||
/* Bcj2.c -- Converter for x86 code (BCJ2)
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Bcj2.h"
|
||||
|
||||
#ifdef _LZMA_PROB32
|
||||
#define CProb UInt32
|
||||
#else
|
||||
#define CProb UInt16
|
||||
#endif
|
||||
|
||||
#define IsJcc(b0, b1) ((b0) == 0x0F && ((b1) & 0xF0) == 0x80)
|
||||
#define IsJ(b0, b1) ((b1 & 0xFE) == 0xE8 || IsJcc(b0, b1))
|
||||
|
||||
#define kNumTopBits 24
|
||||
#define kTopValue ((UInt32)1 << kNumTopBits)
|
||||
|
||||
#define kNumBitModelTotalBits 11
|
||||
#define kBitModelTotal (1 << kNumBitModelTotalBits)
|
||||
#define kNumMoveBits 5
|
||||
|
||||
#define RC_READ_BYTE (*buffer++)
|
||||
#define RC_TEST { if (buffer == bufferLim) return SZ_ERROR_DATA; }
|
||||
#define RC_INIT2 code = 0; range = 0xFFFFFFFF; \
|
||||
{ int i; for (i = 0; i < 5; i++) { RC_TEST; code = (code << 8) | RC_READ_BYTE; }}
|
||||
|
||||
#define NORMALIZE if (range < kTopValue) { RC_TEST; range <<= 8; code = (code << 8) | RC_READ_BYTE; }
|
||||
|
||||
#define IF_BIT_0(p) ttt = *(p); bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
|
||||
#define UPDATE_0(p) range = bound; *(p) = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); NORMALIZE;
|
||||
#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CProb)(ttt - (ttt >> kNumMoveBits)); NORMALIZE;
|
||||
|
||||
int Bcj2_Decode(
|
||||
const Byte *buf0, SizeT size0,
|
||||
const Byte *buf1, SizeT size1,
|
||||
const Byte *buf2, SizeT size2,
|
||||
const Byte *buf3, SizeT size3,
|
||||
Byte *outBuf, SizeT outSize)
|
||||
{
|
||||
CProb p[256 + 2];
|
||||
SizeT inPos = 0, outPos = 0;
|
||||
|
||||
const Byte *buffer, *bufferLim;
|
||||
UInt32 range, code;
|
||||
Byte prevByte = 0;
|
||||
|
||||
unsigned int i;
|
||||
for (i = 0; i < sizeof(p) / sizeof(p[0]); i++)
|
||||
p[i] = kBitModelTotal >> 1;
|
||||
|
||||
buffer = buf3;
|
||||
bufferLim = buffer + size3;
|
||||
RC_INIT2
|
||||
|
||||
if (outSize == 0)
|
||||
return SZ_OK;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
Byte b;
|
||||
CProb *prob;
|
||||
UInt32 bound;
|
||||
UInt32 ttt;
|
||||
|
||||
SizeT limit = size0 - inPos;
|
||||
if (outSize - outPos < limit)
|
||||
limit = outSize - outPos;
|
||||
while (limit != 0)
|
||||
{
|
||||
Byte b = buf0[inPos];
|
||||
outBuf[outPos++] = b;
|
||||
if (IsJ(prevByte, b))
|
||||
break;
|
||||
inPos++;
|
||||
prevByte = b;
|
||||
limit--;
|
||||
}
|
||||
|
||||
if (limit == 0 || outPos == outSize)
|
||||
break;
|
||||
|
||||
b = buf0[inPos++];
|
||||
|
||||
if (b == 0xE8)
|
||||
prob = p + prevByte;
|
||||
else if (b == 0xE9)
|
||||
prob = p + 256;
|
||||
else
|
||||
prob = p + 257;
|
||||
|
||||
IF_BIT_0(prob)
|
||||
{
|
||||
UPDATE_0(prob)
|
||||
prevByte = b;
|
||||
}
|
||||
else
|
||||
{
|
||||
UInt32 dest;
|
||||
const Byte *v;
|
||||
UPDATE_1(prob)
|
||||
if (b == 0xE8)
|
||||
{
|
||||
v = buf1;
|
||||
if (size1 < 4)
|
||||
return SZ_ERROR_DATA;
|
||||
buf1 += 4;
|
||||
size1 -= 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
v = buf2;
|
||||
if (size2 < 4)
|
||||
return SZ_ERROR_DATA;
|
||||
buf2 += 4;
|
||||
size2 -= 4;
|
||||
}
|
||||
dest = (((UInt32)v[0] << 24) | ((UInt32)v[1] << 16) |
|
||||
((UInt32)v[2] << 8) | ((UInt32)v[3])) - ((UInt32)outPos + 4);
|
||||
outBuf[outPos++] = (Byte)dest;
|
||||
if (outPos == outSize)
|
||||
break;
|
||||
outBuf[outPos++] = (Byte)(dest >> 8);
|
||||
if (outPos == outSize)
|
||||
break;
|
||||
outBuf[outPos++] = (Byte)(dest >> 16);
|
||||
if (outPos == outSize)
|
||||
break;
|
||||
outBuf[outPos++] = prevByte = (Byte)(dest >> 24);
|
||||
}
|
||||
}
|
||||
return (outPos == outSize) ? SZ_OK : SZ_ERROR_DATA;
|
||||
}
|
||||
30
vendor/easylzma/src/pavlov/Bcj2.h
vendored
30
vendor/easylzma/src/pavlov/Bcj2.h
vendored
|
|
@ -1,30 +0,0 @@
|
|||
/* Bcj2.h -- Converter for x86 code (BCJ2)
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __BCJ2_H
|
||||
#define __BCJ2_H
|
||||
|
||||
#include "Types.h"
|
||||
|
||||
/*
|
||||
Conditions:
|
||||
outSize <= FullOutputSize,
|
||||
where FullOutputSize is full size of output stream of x86_2 filter.
|
||||
|
||||
If buf0 overlaps outBuf, there are two required conditions:
|
||||
1) (buf0 >= outBuf)
|
||||
2) (buf0 + size0 >= outBuf + FullOutputSize).
|
||||
|
||||
Returns:
|
||||
SZ_OK
|
||||
SZ_ERROR_DATA - Data error
|
||||
*/
|
||||
|
||||
int Bcj2_Decode(
|
||||
const Byte *buf0, SizeT size0,
|
||||
const Byte *buf1, SizeT size1,
|
||||
const Byte *buf2, SizeT size2,
|
||||
const Byte *buf3, SizeT size3,
|
||||
Byte *outBuf, SizeT outSize);
|
||||
|
||||
#endif
|
||||
133
vendor/easylzma/src/pavlov/Bra.c
vendored
133
vendor/easylzma/src/pavlov/Bra.c
vendored
|
|
@ -1,133 +0,0 @@
|
|||
/* Bra.c -- Converters for RISC code
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Bra.h"
|
||||
|
||||
SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
|
||||
{
|
||||
SizeT i;
|
||||
if (size < 4)
|
||||
return 0;
|
||||
size -= 4;
|
||||
ip += 8;
|
||||
for (i = 0; i <= size; i += 4)
|
||||
{
|
||||
if (data[i + 3] == 0xEB)
|
||||
{
|
||||
UInt32 dest;
|
||||
UInt32 src = ((UInt32)data[i + 2] << 16) | ((UInt32)data[i + 1] << 8) | (data[i + 0]);
|
||||
src <<= 2;
|
||||
if (encoding)
|
||||
dest = ip + (UInt32)i + src;
|
||||
else
|
||||
dest = src - (ip + (UInt32)i);
|
||||
dest >>= 2;
|
||||
data[i + 2] = (Byte)(dest >> 16);
|
||||
data[i + 1] = (Byte)(dest >> 8);
|
||||
data[i + 0] = (Byte)dest;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
|
||||
{
|
||||
SizeT i;
|
||||
if (size < 4)
|
||||
return 0;
|
||||
size -= 4;
|
||||
ip += 4;
|
||||
for (i = 0; i <= size; i += 2)
|
||||
{
|
||||
if ((data[i + 1] & 0xF8) == 0xF0 &&
|
||||
(data[i + 3] & 0xF8) == 0xF8)
|
||||
{
|
||||
UInt32 dest;
|
||||
UInt32 src =
|
||||
(((UInt32)data[i + 1] & 0x7) << 19) |
|
||||
((UInt32)data[i + 0] << 11) |
|
||||
(((UInt32)data[i + 3] & 0x7) << 8) |
|
||||
(data[i + 2]);
|
||||
|
||||
src <<= 1;
|
||||
if (encoding)
|
||||
dest = ip + (UInt32)i + src;
|
||||
else
|
||||
dest = src - (ip + (UInt32)i);
|
||||
dest >>= 1;
|
||||
|
||||
data[i + 1] = (Byte)(0xF0 | ((dest >> 19) & 0x7));
|
||||
data[i + 0] = (Byte)(dest >> 11);
|
||||
data[i + 3] = (Byte)(0xF8 | ((dest >> 8) & 0x7));
|
||||
data[i + 2] = (Byte)dest;
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
|
||||
{
|
||||
SizeT i;
|
||||
if (size < 4)
|
||||
return 0;
|
||||
size -= 4;
|
||||
for (i = 0; i <= size; i += 4)
|
||||
{
|
||||
if ((data[i] >> 2) == 0x12 && (data[i + 3] & 3) == 1)
|
||||
{
|
||||
UInt32 src = ((UInt32)(data[i + 0] & 3) << 24) |
|
||||
((UInt32)data[i + 1] << 16) |
|
||||
((UInt32)data[i + 2] << 8) |
|
||||
((UInt32)data[i + 3] & (~3));
|
||||
|
||||
UInt32 dest;
|
||||
if (encoding)
|
||||
dest = ip + (UInt32)i + src;
|
||||
else
|
||||
dest = src - (ip + (UInt32)i);
|
||||
data[i + 0] = (Byte)(0x48 | ((dest >> 24) & 0x3));
|
||||
data[i + 1] = (Byte)(dest >> 16);
|
||||
data[i + 2] = (Byte)(dest >> 8);
|
||||
data[i + 3] &= 0x3;
|
||||
data[i + 3] |= dest;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
|
||||
{
|
||||
UInt32 i;
|
||||
if (size < 4)
|
||||
return 0;
|
||||
size -= 4;
|
||||
for (i = 0; i <= size; i += 4)
|
||||
{
|
||||
if (data[i] == 0x40 && (data[i + 1] & 0xC0) == 0x00 ||
|
||||
data[i] == 0x7F && (data[i + 1] & 0xC0) == 0xC0)
|
||||
{
|
||||
UInt32 src =
|
||||
((UInt32)data[i + 0] << 24) |
|
||||
((UInt32)data[i + 1] << 16) |
|
||||
((UInt32)data[i + 2] << 8) |
|
||||
((UInt32)data[i + 3]);
|
||||
UInt32 dest;
|
||||
|
||||
src <<= 2;
|
||||
if (encoding)
|
||||
dest = ip + i + src;
|
||||
else
|
||||
dest = src - (ip + i);
|
||||
dest >>= 2;
|
||||
|
||||
dest = (((0 - ((dest >> 22) & 1)) << 22) & 0x3FFFFFFF) | (dest & 0x3FFFFF) | 0x40000000;
|
||||
|
||||
data[i + 0] = (Byte)(dest >> 24);
|
||||
data[i + 1] = (Byte)(dest >> 16);
|
||||
data[i + 2] = (Byte)(dest >> 8);
|
||||
data[i + 3] = (Byte)dest;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
60
vendor/easylzma/src/pavlov/Bra.h
vendored
60
vendor/easylzma/src/pavlov/Bra.h
vendored
|
|
@ -1,60 +0,0 @@
|
|||
/* Bra.h -- Branch converters for executables
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __BRA_H
|
||||
#define __BRA_H
|
||||
|
||||
#include "Types.h"
|
||||
|
||||
/*
|
||||
These functions convert relative addresses to absolute addresses
|
||||
in CALL instructions to increase the compression ratio.
|
||||
|
||||
In:
|
||||
data - data buffer
|
||||
size - size of data
|
||||
ip - current virtual Instruction Pinter (IP) value
|
||||
state - state variable for x86 converter
|
||||
encoding - 0 (for decoding), 1 (for encoding)
|
||||
|
||||
Out:
|
||||
state - state variable for x86 converter
|
||||
|
||||
Returns:
|
||||
The number of processed bytes. If you call these functions with multiple calls,
|
||||
you must start next call with first byte after block of processed bytes.
|
||||
|
||||
Type Endian Alignment LookAhead
|
||||
|
||||
x86 little 1 4
|
||||
ARMT little 2 2
|
||||
ARM little 4 0
|
||||
PPC big 4 0
|
||||
SPARC big 4 0
|
||||
IA64 little 16 0
|
||||
|
||||
size must be >= Alignment + LookAhead, if it's not last block.
|
||||
If (size < Alignment + LookAhead), converter returns 0.
|
||||
|
||||
Example:
|
||||
|
||||
UInt32 ip = 0;
|
||||
for ()
|
||||
{
|
||||
; size must be >= Alignment + LookAhead, if it's not last block
|
||||
SizeT processed = Convert(data, size, ip, 1);
|
||||
data += processed;
|
||||
size -= processed;
|
||||
ip += processed;
|
||||
}
|
||||
*/
|
||||
|
||||
#define x86_Convert_Init(state) { state = 0; }
|
||||
SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding);
|
||||
SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
|
||||
SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
|
||||
SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
|
||||
SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
|
||||
SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
|
||||
|
||||
#endif
|
||||
85
vendor/easylzma/src/pavlov/Bra86.c
vendored
85
vendor/easylzma/src/pavlov/Bra86.c
vendored
|
|
@ -1,85 +0,0 @@
|
|||
/* Bra86.c -- Converter for x86 code (BCJ)
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Bra.h"
|
||||
|
||||
#define Test86MSByte(b) ((b) == 0 || (b) == 0xFF)
|
||||
|
||||
const Byte kMaskToAllowedStatus[8] = {1, 1, 1, 0, 1, 0, 0, 0};
|
||||
const Byte kMaskToBitNumber[8] = {0, 1, 2, 2, 3, 3, 3, 3};
|
||||
|
||||
SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding)
|
||||
{
|
||||
SizeT bufferPos = 0, prevPosT;
|
||||
UInt32 prevMask = *state & 0x7;
|
||||
if (size < 5)
|
||||
return 0;
|
||||
ip += 5;
|
||||
prevPosT = (SizeT)0 - 1;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
Byte *p = data + bufferPos;
|
||||
Byte *limit = data + size - 4;
|
||||
for (; p < limit; p++)
|
||||
if ((*p & 0xFE) == 0xE8)
|
||||
break;
|
||||
bufferPos = (SizeT)(p - data);
|
||||
if (p >= limit)
|
||||
break;
|
||||
prevPosT = bufferPos - prevPosT;
|
||||
if (prevPosT > 3)
|
||||
prevMask = 0;
|
||||
else
|
||||
{
|
||||
prevMask = (prevMask << ((int)prevPosT - 1)) & 0x7;
|
||||
if (prevMask != 0)
|
||||
{
|
||||
Byte b = p[4 - kMaskToBitNumber[prevMask]];
|
||||
if (!kMaskToAllowedStatus[prevMask] || Test86MSByte(b))
|
||||
{
|
||||
prevPosT = bufferPos;
|
||||
prevMask = ((prevMask << 1) & 0x7) | 1;
|
||||
bufferPos++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
prevPosT = bufferPos;
|
||||
|
||||
if (Test86MSByte(p[4]))
|
||||
{
|
||||
UInt32 src = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]);
|
||||
UInt32 dest;
|
||||
for (;;)
|
||||
{
|
||||
Byte b;
|
||||
int index;
|
||||
if (encoding)
|
||||
dest = (ip + (UInt32)bufferPos) + src;
|
||||
else
|
||||
dest = src - (ip + (UInt32)bufferPos);
|
||||
if (prevMask == 0)
|
||||
break;
|
||||
index = kMaskToBitNumber[prevMask] * 8;
|
||||
b = (Byte)(dest >> (24 - index));
|
||||
if (!Test86MSByte(b))
|
||||
break;
|
||||
src = dest ^ ((1 << (32 - index)) - 1);
|
||||
}
|
||||
p[4] = (Byte)(~(((dest >> 24) & 1) - 1));
|
||||
p[3] = (Byte)(dest >> 16);
|
||||
p[2] = (Byte)(dest >> 8);
|
||||
p[1] = (Byte)dest;
|
||||
bufferPos += 5;
|
||||
}
|
||||
else
|
||||
{
|
||||
prevMask = ((prevMask << 1) & 0x7) | 1;
|
||||
bufferPos++;
|
||||
}
|
||||
}
|
||||
prevPosT = bufferPos - prevPosT;
|
||||
*state = ((prevPosT > 3) ? 0 : ((prevMask << ((int)prevPosT - 1)) & 0x7));
|
||||
return bufferPos;
|
||||
}
|
||||
67
vendor/easylzma/src/pavlov/BraIA64.c
vendored
67
vendor/easylzma/src/pavlov/BraIA64.c
vendored
|
|
@ -1,67 +0,0 @@
|
|||
/* BraIA64.c -- Converter for IA-64 code
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Bra.h"
|
||||
|
||||
static const Byte kBranchTable[32] =
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
4, 4, 6, 6, 0, 0, 7, 7,
|
||||
4, 4, 0, 0, 4, 4, 0, 0
|
||||
};
|
||||
|
||||
SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
|
||||
{
|
||||
SizeT i;
|
||||
if (size < 16)
|
||||
return 0;
|
||||
size -= 16;
|
||||
for (i = 0; i <= size; i += 16)
|
||||
{
|
||||
UInt32 instrTemplate = data[i] & 0x1F;
|
||||
UInt32 mask = kBranchTable[instrTemplate];
|
||||
UInt32 bitPos = 5;
|
||||
int slot;
|
||||
for (slot = 0; slot < 3; slot++, bitPos += 41)
|
||||
{
|
||||
UInt32 bytePos, bitRes;
|
||||
UInt64 instruction, instNorm;
|
||||
int j;
|
||||
if (((mask >> slot) & 1) == 0)
|
||||
continue;
|
||||
bytePos = (bitPos >> 3);
|
||||
bitRes = bitPos & 0x7;
|
||||
instruction = 0;
|
||||
for (j = 0; j < 6; j++)
|
||||
instruction += (UInt64)data[i + j + bytePos] << (8 * j);
|
||||
|
||||
instNorm = instruction >> bitRes;
|
||||
if (((instNorm >> 37) & 0xF) == 0x5 && ((instNorm >> 9) & 0x7) == 0)
|
||||
{
|
||||
UInt32 src = (UInt32)((instNorm >> 13) & 0xFFFFF);
|
||||
UInt32 dest;
|
||||
src |= ((UInt32)(instNorm >> 36) & 1) << 20;
|
||||
|
||||
src <<= 4;
|
||||
|
||||
if (encoding)
|
||||
dest = ip + (UInt32)i + src;
|
||||
else
|
||||
dest = src - (ip + (UInt32)i);
|
||||
|
||||
dest >>= 4;
|
||||
|
||||
instNorm &= ~((UInt64)(0x8FFFFF) << 13);
|
||||
instNorm |= ((UInt64)(dest & 0xFFFFF) << 13);
|
||||
instNorm |= ((UInt64)(dest & 0x100000) << (36 - 20));
|
||||
|
||||
instruction &= (1 << bitRes) - 1;
|
||||
instruction |= (instNorm << bitRes);
|
||||
for (j = 0; j < 6; j++)
|
||||
data[i + j + bytePos] = (Byte)(instruction >> (8 * j));
|
||||
}
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
69
vendor/easylzma/src/pavlov/CpuArch.h
vendored
69
vendor/easylzma/src/pavlov/CpuArch.h
vendored
|
|
@ -1,69 +0,0 @@
|
|||
/* CpuArch.h
|
||||
2008-08-05
|
||||
Igor Pavlov
|
||||
Public domain */
|
||||
|
||||
#ifndef __CPUARCH_H
|
||||
#define __CPUARCH_H
|
||||
|
||||
/*
|
||||
LITTLE_ENDIAN_UNALIGN means:
|
||||
1) CPU is LITTLE_ENDIAN
|
||||
2) it's allowed to make unaligned memory accesses
|
||||
if LITTLE_ENDIAN_UNALIGN is not defined, it means that we don't know
|
||||
about these properties of platform.
|
||||
*/
|
||||
|
||||
#if defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || defined(__i386__) || defined(__x86_64__)
|
||||
#define LITTLE_ENDIAN_UNALIGN
|
||||
#endif
|
||||
|
||||
#ifdef LITTLE_ENDIAN_UNALIGN
|
||||
|
||||
#define GetUi16(p) (*(const UInt16 *)(p))
|
||||
#define GetUi32(p) (*(const UInt32 *)(p))
|
||||
#define GetUi64(p) (*(const UInt64 *)(p))
|
||||
#define SetUi32(p, d) *(UInt32 *)(p) = (d);
|
||||
|
||||
#else
|
||||
|
||||
#define GetUi16(p) (((const Byte *)(p))[0] | ((UInt16)((const Byte *)(p))[1] << 8))
|
||||
|
||||
#define GetUi32(p) ( \
|
||||
((const Byte *)(p))[0] | \
|
||||
((UInt32)((const Byte *)(p))[1] << 8) | \
|
||||
((UInt32)((const Byte *)(p))[2] << 16) | \
|
||||
((UInt32)((const Byte *)(p))[3] << 24))
|
||||
|
||||
#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
|
||||
|
||||
#define SetUi32(p, d) { UInt32 _x_ = (d); \
|
||||
((Byte *)(p))[0] = (Byte)_x_; \
|
||||
((Byte *)(p))[1] = (Byte)(_x_ >> 8); \
|
||||
((Byte *)(p))[2] = (Byte)(_x_ >> 16); \
|
||||
((Byte *)(p))[3] = (Byte)(_x_ >> 24); }
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(LITTLE_ENDIAN_UNALIGN) && defined(_WIN64) && (_MSC_VER >= 1300)
|
||||
|
||||
#pragma intrinsic(_byteswap_ulong)
|
||||
#pragma intrinsic(_byteswap_uint64)
|
||||
#define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p))
|
||||
#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p))
|
||||
|
||||
#else
|
||||
|
||||
#define GetBe32(p) ( \
|
||||
((UInt32)((const Byte *)(p))[0] << 24) | \
|
||||
((UInt32)((const Byte *)(p))[1] << 16) | \
|
||||
((UInt32)((const Byte *)(p))[2] << 8) | \
|
||||
((const Byte *)(p))[3] )
|
||||
|
||||
#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
|
||||
|
||||
#endif
|
||||
|
||||
#define GetBe16(p) (((UInt16)((const Byte *)(p))[0] << 8) | ((const Byte *)(p))[1])
|
||||
|
||||
#endif
|
||||
751
vendor/easylzma/src/pavlov/LzFind.c
vendored
751
vendor/easylzma/src/pavlov/LzFind.c
vendored
|
|
@ -1,751 +0,0 @@
|
|||
/* LzFind.c -- Match finder for LZ algorithms
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "LzFind.h"
|
||||
#include "LzHash.h"
|
||||
|
||||
#define kEmptyHashValue 0
|
||||
#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
|
||||
#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
|
||||
#define kNormalizeMask (~(kNormalizeStepMin - 1))
|
||||
#define kMaxHistorySize ((UInt32)3 << 30)
|
||||
|
||||
#define kStartMaxLen 3
|
||||
|
||||
static void LzInWindow_Free(CMatchFinder *p, ISzAlloc *alloc)
|
||||
{
|
||||
if (!p->directInput)
|
||||
{
|
||||
alloc->Free(alloc, p->bufferBase);
|
||||
p->bufferBase = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
|
||||
|
||||
static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAlloc *alloc)
|
||||
{
|
||||
UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
|
||||
if (p->directInput)
|
||||
{
|
||||
p->blockSize = blockSize;
|
||||
return 1;
|
||||
}
|
||||
if (p->bufferBase == 0 || p->blockSize != blockSize)
|
||||
{
|
||||
LzInWindow_Free(p, alloc);
|
||||
p->blockSize = blockSize;
|
||||
p->bufferBase = (Byte *)alloc->Alloc(alloc, (size_t)blockSize);
|
||||
}
|
||||
return (p->bufferBase != 0);
|
||||
}
|
||||
|
||||
Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
|
||||
Byte MatchFinder_GetIndexByte(CMatchFinder *p, Int32 index) { return p->buffer[index]; }
|
||||
|
||||
UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
|
||||
|
||||
void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
|
||||
{
|
||||
p->posLimit -= subValue;
|
||||
p->pos -= subValue;
|
||||
p->streamPos -= subValue;
|
||||
}
|
||||
|
||||
static void MatchFinder_ReadBlock(CMatchFinder *p)
|
||||
{
|
||||
if (p->streamEndWasReached || p->result != SZ_OK)
|
||||
return;
|
||||
for (;;)
|
||||
{
|
||||
Byte *dest = p->buffer + (p->streamPos - p->pos);
|
||||
size_t size = (p->bufferBase + p->blockSize - dest);
|
||||
if (size == 0)
|
||||
return;
|
||||
p->result = p->stream->Read(p->stream, dest, &size);
|
||||
if (p->result != SZ_OK)
|
||||
return;
|
||||
if (size == 0)
|
||||
{
|
||||
p->streamEndWasReached = 1;
|
||||
return;
|
||||
}
|
||||
p->streamPos += (UInt32)size;
|
||||
if (p->streamPos - p->pos > p->keepSizeAfter)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void MatchFinder_MoveBlock(CMatchFinder *p)
|
||||
{
|
||||
memmove(p->bufferBase,
|
||||
p->buffer - p->keepSizeBefore,
|
||||
(size_t)(p->streamPos - p->pos + p->keepSizeBefore));
|
||||
p->buffer = p->bufferBase + p->keepSizeBefore;
|
||||
}
|
||||
|
||||
int MatchFinder_NeedMove(CMatchFinder *p)
|
||||
{
|
||||
/* if (p->streamEndWasReached) return 0; */
|
||||
return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
|
||||
}
|
||||
|
||||
void MatchFinder_ReadIfRequired(CMatchFinder *p)
|
||||
{
|
||||
if (p->streamEndWasReached)
|
||||
return;
|
||||
if (p->keepSizeAfter >= p->streamPos - p->pos)
|
||||
MatchFinder_ReadBlock(p);
|
||||
}
|
||||
|
||||
static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
|
||||
{
|
||||
if (MatchFinder_NeedMove(p))
|
||||
MatchFinder_MoveBlock(p);
|
||||
MatchFinder_ReadBlock(p);
|
||||
}
|
||||
|
||||
static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
|
||||
{
|
||||
p->cutValue = 32;
|
||||
p->btMode = 1;
|
||||
p->numHashBytes = 4;
|
||||
/* p->skipModeBits = 0; */
|
||||
p->directInput = 0;
|
||||
p->bigHash = 0;
|
||||
}
|
||||
|
||||
#define kCrcPoly 0xEDB88320
|
||||
|
||||
void MatchFinder_Construct(CMatchFinder *p)
|
||||
{
|
||||
UInt32 i;
|
||||
p->bufferBase = 0;
|
||||
p->directInput = 0;
|
||||
p->hash = 0;
|
||||
MatchFinder_SetDefaultSettings(p);
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
UInt32 r = i;
|
||||
int j;
|
||||
for (j = 0; j < 8; j++)
|
||||
r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1));
|
||||
p->crc[i] = r;
|
||||
}
|
||||
}
|
||||
|
||||
static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAlloc *alloc)
|
||||
{
|
||||
alloc->Free(alloc, p->hash);
|
||||
p->hash = 0;
|
||||
}
|
||||
|
||||
void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc)
|
||||
{
|
||||
MatchFinder_FreeThisClassMemory(p, alloc);
|
||||
LzInWindow_Free(p, alloc);
|
||||
}
|
||||
|
||||
static CLzRef* AllocRefs(UInt32 num, ISzAlloc *alloc)
|
||||
{
|
||||
size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
|
||||
if (sizeInBytes / sizeof(CLzRef) != num)
|
||||
return 0;
|
||||
return (CLzRef *)alloc->Alloc(alloc, sizeInBytes);
|
||||
}
|
||||
|
||||
int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
|
||||
UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
|
||||
ISzAlloc *alloc)
|
||||
{
|
||||
UInt32 sizeReserv;
|
||||
if (historySize > kMaxHistorySize)
|
||||
{
|
||||
MatchFinder_Free(p, alloc);
|
||||
return 0;
|
||||
}
|
||||
sizeReserv = historySize >> 1;
|
||||
if (historySize > ((UInt32)2 << 30))
|
||||
sizeReserv = historySize >> 2;
|
||||
sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
|
||||
|
||||
p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
|
||||
p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
|
||||
/* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
|
||||
if (LzInWindow_Create(p, sizeReserv, alloc))
|
||||
{
|
||||
UInt32 newCyclicBufferSize = (historySize /* >> p->skipModeBits */) + 1;
|
||||
UInt32 hs;
|
||||
p->matchMaxLen = matchMaxLen;
|
||||
{
|
||||
p->fixedHashSize = 0;
|
||||
if (p->numHashBytes == 2)
|
||||
hs = (1 << 16) - 1;
|
||||
else
|
||||
{
|
||||
hs = historySize - 1;
|
||||
hs |= (hs >> 1);
|
||||
hs |= (hs >> 2);
|
||||
hs |= (hs >> 4);
|
||||
hs |= (hs >> 8);
|
||||
hs >>= 1;
|
||||
/* hs >>= p->skipModeBits; */
|
||||
hs |= 0xFFFF; /* don't change it! It's required for Deflate */
|
||||
if (hs > (1 << 24))
|
||||
{
|
||||
if (p->numHashBytes == 3)
|
||||
hs = (1 << 24) - 1;
|
||||
else
|
||||
hs >>= 1;
|
||||
}
|
||||
}
|
||||
p->hashMask = hs;
|
||||
hs++;
|
||||
if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
|
||||
if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
|
||||
if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
|
||||
hs += p->fixedHashSize;
|
||||
}
|
||||
|
||||
{
|
||||
UInt32 prevSize = p->hashSizeSum + p->numSons;
|
||||
UInt32 newSize;
|
||||
p->historySize = historySize;
|
||||
p->hashSizeSum = hs;
|
||||
p->cyclicBufferSize = newCyclicBufferSize;
|
||||
p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize);
|
||||
newSize = p->hashSizeSum + p->numSons;
|
||||
if (p->hash != 0 && prevSize == newSize)
|
||||
return 1;
|
||||
MatchFinder_FreeThisClassMemory(p, alloc);
|
||||
p->hash = AllocRefs(newSize, alloc);
|
||||
if (p->hash != 0)
|
||||
{
|
||||
p->son = p->hash + p->hashSizeSum;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
MatchFinder_Free(p, alloc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void MatchFinder_SetLimits(CMatchFinder *p)
|
||||
{
|
||||
UInt32 limit = kMaxValForNormalize - p->pos;
|
||||
UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
|
||||
if (limit2 < limit)
|
||||
limit = limit2;
|
||||
limit2 = p->streamPos - p->pos;
|
||||
if (limit2 <= p->keepSizeAfter)
|
||||
{
|
||||
if (limit2 > 0)
|
||||
limit2 = 1;
|
||||
}
|
||||
else
|
||||
limit2 -= p->keepSizeAfter;
|
||||
if (limit2 < limit)
|
||||
limit = limit2;
|
||||
{
|
||||
UInt32 lenLimit = p->streamPos - p->pos;
|
||||
if (lenLimit > p->matchMaxLen)
|
||||
lenLimit = p->matchMaxLen;
|
||||
p->lenLimit = lenLimit;
|
||||
}
|
||||
p->posLimit = p->pos + limit;
|
||||
}
|
||||
|
||||
void MatchFinder_Init(CMatchFinder *p)
|
||||
{
|
||||
UInt32 i;
|
||||
for (i = 0; i < p->hashSizeSum; i++)
|
||||
p->hash[i] = kEmptyHashValue;
|
||||
p->cyclicBufferPos = 0;
|
||||
p->buffer = p->bufferBase;
|
||||
p->pos = p->streamPos = p->cyclicBufferSize;
|
||||
p->result = SZ_OK;
|
||||
p->streamEndWasReached = 0;
|
||||
MatchFinder_ReadBlock(p);
|
||||
MatchFinder_SetLimits(p);
|
||||
}
|
||||
|
||||
static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
|
||||
{
|
||||
return (p->pos - p->historySize - 1) & kNormalizeMask;
|
||||
}
|
||||
|
||||
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems)
|
||||
{
|
||||
UInt32 i;
|
||||
for (i = 0; i < numItems; i++)
|
||||
{
|
||||
UInt32 value = items[i];
|
||||
if (value <= subValue)
|
||||
value = kEmptyHashValue;
|
||||
else
|
||||
value -= subValue;
|
||||
items[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
static void MatchFinder_Normalize(CMatchFinder *p)
|
||||
{
|
||||
UInt32 subValue = MatchFinder_GetSubValue(p);
|
||||
MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons);
|
||||
MatchFinder_ReduceOffsets(p, subValue);
|
||||
}
|
||||
|
||||
static void MatchFinder_CheckLimits(CMatchFinder *p)
|
||||
{
|
||||
if (p->pos == kMaxValForNormalize)
|
||||
MatchFinder_Normalize(p);
|
||||
if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
|
||||
MatchFinder_CheckAndMoveAndRead(p);
|
||||
if (p->cyclicBufferPos == p->cyclicBufferSize)
|
||||
p->cyclicBufferPos = 0;
|
||||
MatchFinder_SetLimits(p);
|
||||
}
|
||||
|
||||
static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
|
||||
UInt32 *distances, UInt32 maxLen)
|
||||
{
|
||||
son[_cyclicBufferPos] = curMatch;
|
||||
for (;;)
|
||||
{
|
||||
UInt32 delta = pos - curMatch;
|
||||
if (cutValue-- == 0 || delta >= _cyclicBufferSize)
|
||||
return distances;
|
||||
{
|
||||
const Byte *pb = cur - delta;
|
||||
curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
|
||||
if (pb[maxLen] == cur[maxLen] && *pb == *cur)
|
||||
{
|
||||
UInt32 len = 0;
|
||||
while (++len != lenLimit)
|
||||
if (pb[len] != cur[len])
|
||||
break;
|
||||
if (maxLen < len)
|
||||
{
|
||||
*distances++ = maxLen = len;
|
||||
*distances++ = delta - 1;
|
||||
if (len == lenLimit)
|
||||
return distances;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
|
||||
UInt32 *distances, UInt32 maxLen)
|
||||
{
|
||||
CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
|
||||
CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
|
||||
UInt32 len0 = 0, len1 = 0;
|
||||
for (;;)
|
||||
{
|
||||
UInt32 delta = pos - curMatch;
|
||||
if (cutValue-- == 0 || delta >= _cyclicBufferSize)
|
||||
{
|
||||
*ptr0 = *ptr1 = kEmptyHashValue;
|
||||
return distances;
|
||||
}
|
||||
{
|
||||
CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
|
||||
const Byte *pb = cur - delta;
|
||||
UInt32 len = (len0 < len1 ? len0 : len1);
|
||||
if (pb[len] == cur[len])
|
||||
{
|
||||
if (++len != lenLimit && pb[len] == cur[len])
|
||||
while (++len != lenLimit)
|
||||
if (pb[len] != cur[len])
|
||||
break;
|
||||
if (maxLen < len)
|
||||
{
|
||||
*distances++ = maxLen = len;
|
||||
*distances++ = delta - 1;
|
||||
if (len == lenLimit)
|
||||
{
|
||||
*ptr1 = pair[0];
|
||||
*ptr0 = pair[1];
|
||||
return distances;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (pb[len] < cur[len])
|
||||
{
|
||||
*ptr1 = curMatch;
|
||||
ptr1 = pair + 1;
|
||||
curMatch = *ptr1;
|
||||
len1 = len;
|
||||
}
|
||||
else
|
||||
{
|
||||
*ptr0 = curMatch;
|
||||
ptr0 = pair;
|
||||
curMatch = *ptr0;
|
||||
len0 = len;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
|
||||
{
|
||||
CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
|
||||
CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
|
||||
UInt32 len0 = 0, len1 = 0;
|
||||
for (;;)
|
||||
{
|
||||
UInt32 delta = pos - curMatch;
|
||||
if (cutValue-- == 0 || delta >= _cyclicBufferSize)
|
||||
{
|
||||
*ptr0 = *ptr1 = kEmptyHashValue;
|
||||
return;
|
||||
}
|
||||
{
|
||||
CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
|
||||
const Byte *pb = cur - delta;
|
||||
UInt32 len = (len0 < len1 ? len0 : len1);
|
||||
if (pb[len] == cur[len])
|
||||
{
|
||||
while (++len != lenLimit)
|
||||
if (pb[len] != cur[len])
|
||||
break;
|
||||
{
|
||||
if (len == lenLimit)
|
||||
{
|
||||
*ptr1 = pair[0];
|
||||
*ptr0 = pair[1];
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (pb[len] < cur[len])
|
||||
{
|
||||
*ptr1 = curMatch;
|
||||
ptr1 = pair + 1;
|
||||
curMatch = *ptr1;
|
||||
len1 = len;
|
||||
}
|
||||
else
|
||||
{
|
||||
*ptr0 = curMatch;
|
||||
ptr0 = pair;
|
||||
curMatch = *ptr0;
|
||||
len0 = len;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define MOVE_POS \
|
||||
++p->cyclicBufferPos; \
|
||||
p->buffer++; \
|
||||
if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
|
||||
|
||||
#define MOVE_POS_RET MOVE_POS return offset;
|
||||
|
||||
static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
|
||||
|
||||
#define GET_MATCHES_HEADER2(minLen, ret_op) \
|
||||
UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \
|
||||
lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
|
||||
cur = p->buffer;
|
||||
|
||||
#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
|
||||
#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
|
||||
|
||||
#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
|
||||
|
||||
#define GET_MATCHES_FOOTER(offset, maxLen) \
|
||||
offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \
|
||||
distances + offset, maxLen) - distances); MOVE_POS_RET;
|
||||
|
||||
#define SKIP_FOOTER \
|
||||
SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
|
||||
|
||||
static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
UInt32 offset;
|
||||
GET_MATCHES_HEADER(2)
|
||||
HASH2_CALC;
|
||||
curMatch = p->hash[hashValue];
|
||||
p->hash[hashValue] = p->pos;
|
||||
offset = 0;
|
||||
GET_MATCHES_FOOTER(offset, 1)
|
||||
}
|
||||
|
||||
UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
UInt32 offset;
|
||||
GET_MATCHES_HEADER(3)
|
||||
HASH_ZIP_CALC;
|
||||
curMatch = p->hash[hashValue];
|
||||
p->hash[hashValue] = p->pos;
|
||||
offset = 0;
|
||||
GET_MATCHES_FOOTER(offset, 2)
|
||||
}
|
||||
|
||||
static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
UInt32 hash2Value, delta2, maxLen, offset;
|
||||
GET_MATCHES_HEADER(3)
|
||||
|
||||
HASH3_CALC;
|
||||
|
||||
delta2 = p->pos - p->hash[hash2Value];
|
||||
curMatch = p->hash[kFix3HashSize + hashValue];
|
||||
|
||||
p->hash[hash2Value] =
|
||||
p->hash[kFix3HashSize + hashValue] = p->pos;
|
||||
|
||||
|
||||
maxLen = 2;
|
||||
offset = 0;
|
||||
if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
|
||||
{
|
||||
for (; maxLen != lenLimit; maxLen++)
|
||||
if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
|
||||
break;
|
||||
distances[0] = maxLen;
|
||||
distances[1] = delta2 - 1;
|
||||
offset = 2;
|
||||
if (maxLen == lenLimit)
|
||||
{
|
||||
SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
|
||||
MOVE_POS_RET;
|
||||
}
|
||||
}
|
||||
GET_MATCHES_FOOTER(offset, maxLen)
|
||||
}
|
||||
|
||||
static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
|
||||
GET_MATCHES_HEADER(4)
|
||||
|
||||
HASH4_CALC;
|
||||
|
||||
delta2 = p->pos - p->hash[ hash2Value];
|
||||
delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
|
||||
curMatch = p->hash[kFix4HashSize + hashValue];
|
||||
|
||||
p->hash[ hash2Value] =
|
||||
p->hash[kFix3HashSize + hash3Value] =
|
||||
p->hash[kFix4HashSize + hashValue] = p->pos;
|
||||
|
||||
maxLen = 1;
|
||||
offset = 0;
|
||||
if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
|
||||
{
|
||||
distances[0] = maxLen = 2;
|
||||
distances[1] = delta2 - 1;
|
||||
offset = 2;
|
||||
}
|
||||
if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
|
||||
{
|
||||
maxLen = 3;
|
||||
distances[offset + 1] = delta3 - 1;
|
||||
offset += 2;
|
||||
delta2 = delta3;
|
||||
}
|
||||
if (offset != 0)
|
||||
{
|
||||
for (; maxLen != lenLimit; maxLen++)
|
||||
if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
|
||||
break;
|
||||
distances[offset - 2] = maxLen;
|
||||
if (maxLen == lenLimit)
|
||||
{
|
||||
SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
|
||||
MOVE_POS_RET;
|
||||
}
|
||||
}
|
||||
if (maxLen < 3)
|
||||
maxLen = 3;
|
||||
GET_MATCHES_FOOTER(offset, maxLen)
|
||||
}
|
||||
|
||||
static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
|
||||
GET_MATCHES_HEADER(4)
|
||||
|
||||
HASH4_CALC;
|
||||
|
||||
delta2 = p->pos - p->hash[ hash2Value];
|
||||
delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
|
||||
curMatch = p->hash[kFix4HashSize + hashValue];
|
||||
|
||||
p->hash[ hash2Value] =
|
||||
p->hash[kFix3HashSize + hash3Value] =
|
||||
p->hash[kFix4HashSize + hashValue] = p->pos;
|
||||
|
||||
maxLen = 1;
|
||||
offset = 0;
|
||||
if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
|
||||
{
|
||||
distances[0] = maxLen = 2;
|
||||
distances[1] = delta2 - 1;
|
||||
offset = 2;
|
||||
}
|
||||
if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
|
||||
{
|
||||
maxLen = 3;
|
||||
distances[offset + 1] = delta3 - 1;
|
||||
offset += 2;
|
||||
delta2 = delta3;
|
||||
}
|
||||
if (offset != 0)
|
||||
{
|
||||
for (; maxLen != lenLimit; maxLen++)
|
||||
if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
|
||||
break;
|
||||
distances[offset - 2] = maxLen;
|
||||
if (maxLen == lenLimit)
|
||||
{
|
||||
p->son[p->cyclicBufferPos] = curMatch;
|
||||
MOVE_POS_RET;
|
||||
}
|
||||
}
|
||||
if (maxLen < 3)
|
||||
maxLen = 3;
|
||||
offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
|
||||
distances + offset, maxLen) - (distances));
|
||||
MOVE_POS_RET
|
||||
}
|
||||
|
||||
UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
UInt32 offset;
|
||||
GET_MATCHES_HEADER(3)
|
||||
HASH_ZIP_CALC;
|
||||
curMatch = p->hash[hashValue];
|
||||
p->hash[hashValue] = p->pos;
|
||||
offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
|
||||
distances, 2) - (distances));
|
||||
MOVE_POS_RET
|
||||
}
|
||||
|
||||
static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
||||
{
|
||||
do
|
||||
{
|
||||
SKIP_HEADER(2)
|
||||
HASH2_CALC;
|
||||
curMatch = p->hash[hashValue];
|
||||
p->hash[hashValue] = p->pos;
|
||||
SKIP_FOOTER
|
||||
}
|
||||
while (--num != 0);
|
||||
}
|
||||
|
||||
void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
||||
{
|
||||
do
|
||||
{
|
||||
SKIP_HEADER(3)
|
||||
HASH_ZIP_CALC;
|
||||
curMatch = p->hash[hashValue];
|
||||
p->hash[hashValue] = p->pos;
|
||||
SKIP_FOOTER
|
||||
}
|
||||
while (--num != 0);
|
||||
}
|
||||
|
||||
static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
||||
{
|
||||
do
|
||||
{
|
||||
UInt32 hash2Value;
|
||||
SKIP_HEADER(3)
|
||||
HASH3_CALC;
|
||||
curMatch = p->hash[kFix3HashSize + hashValue];
|
||||
p->hash[hash2Value] =
|
||||
p->hash[kFix3HashSize + hashValue] = p->pos;
|
||||
SKIP_FOOTER
|
||||
}
|
||||
while (--num != 0);
|
||||
}
|
||||
|
||||
static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
||||
{
|
||||
do
|
||||
{
|
||||
UInt32 hash2Value, hash3Value;
|
||||
SKIP_HEADER(4)
|
||||
HASH4_CALC;
|
||||
curMatch = p->hash[kFix4HashSize + hashValue];
|
||||
p->hash[ hash2Value] =
|
||||
p->hash[kFix3HashSize + hash3Value] = p->pos;
|
||||
p->hash[kFix4HashSize + hashValue] = p->pos;
|
||||
SKIP_FOOTER
|
||||
}
|
||||
while (--num != 0);
|
||||
}
|
||||
|
||||
static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
||||
{
|
||||
do
|
||||
{
|
||||
UInt32 hash2Value, hash3Value;
|
||||
SKIP_HEADER(4)
|
||||
HASH4_CALC;
|
||||
curMatch = p->hash[kFix4HashSize + hashValue];
|
||||
p->hash[ hash2Value] =
|
||||
p->hash[kFix3HashSize + hash3Value] =
|
||||
p->hash[kFix4HashSize + hashValue] = p->pos;
|
||||
p->son[p->cyclicBufferPos] = curMatch;
|
||||
MOVE_POS
|
||||
}
|
||||
while (--num != 0);
|
||||
}
|
||||
|
||||
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
||||
{
|
||||
do
|
||||
{
|
||||
SKIP_HEADER(3)
|
||||
HASH_ZIP_CALC;
|
||||
curMatch = p->hash[hashValue];
|
||||
p->hash[hashValue] = p->pos;
|
||||
p->son[p->cyclicBufferPos] = curMatch;
|
||||
MOVE_POS
|
||||
}
|
||||
while (--num != 0);
|
||||
}
|
||||
|
||||
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
|
||||
{
|
||||
vTable->Init = (Mf_Init_Func)MatchFinder_Init;
|
||||
vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte;
|
||||
vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
|
||||
vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
|
||||
if (!p->btMode)
|
||||
{
|
||||
vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
|
||||
vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
|
||||
}
|
||||
else if (p->numHashBytes == 2)
|
||||
{
|
||||
vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;
|
||||
vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;
|
||||
}
|
||||
else if (p->numHashBytes == 3)
|
||||
{
|
||||
vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
|
||||
vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
|
||||
}
|
||||
else
|
||||
{
|
||||
vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
|
||||
vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
|
||||
}
|
||||
}
|
||||
107
vendor/easylzma/src/pavlov/LzFind.h
vendored
107
vendor/easylzma/src/pavlov/LzFind.h
vendored
|
|
@ -1,107 +0,0 @@
|
|||
/* LzFind.h -- Match finder for LZ algorithms
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZFIND_H
|
||||
#define __LZFIND_H
|
||||
|
||||
#include "Types.h"
|
||||
|
||||
typedef UInt32 CLzRef;
|
||||
|
||||
typedef struct _CMatchFinder
|
||||
{
|
||||
Byte *buffer;
|
||||
UInt32 pos;
|
||||
UInt32 posLimit;
|
||||
UInt32 streamPos;
|
||||
UInt32 lenLimit;
|
||||
|
||||
UInt32 cyclicBufferPos;
|
||||
UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
|
||||
|
||||
UInt32 matchMaxLen;
|
||||
CLzRef *hash;
|
||||
CLzRef *son;
|
||||
UInt32 hashMask;
|
||||
UInt32 cutValue;
|
||||
|
||||
Byte *bufferBase;
|
||||
ISeqInStream *stream;
|
||||
int streamEndWasReached;
|
||||
|
||||
UInt32 blockSize;
|
||||
UInt32 keepSizeBefore;
|
||||
UInt32 keepSizeAfter;
|
||||
|
||||
UInt32 numHashBytes;
|
||||
int directInput;
|
||||
int btMode;
|
||||
/* int skipModeBits; */
|
||||
int bigHash;
|
||||
UInt32 historySize;
|
||||
UInt32 fixedHashSize;
|
||||
UInt32 hashSizeSum;
|
||||
UInt32 numSons;
|
||||
SRes result;
|
||||
UInt32 crc[256];
|
||||
} CMatchFinder;
|
||||
|
||||
#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
|
||||
#define Inline_MatchFinder_GetIndexByte(p, index) ((p)->buffer[(Int32)(index)])
|
||||
|
||||
#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
|
||||
|
||||
int MatchFinder_NeedMove(CMatchFinder *p);
|
||||
Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
|
||||
void MatchFinder_MoveBlock(CMatchFinder *p);
|
||||
void MatchFinder_ReadIfRequired(CMatchFinder *p);
|
||||
|
||||
void MatchFinder_Construct(CMatchFinder *p);
|
||||
|
||||
/* Conditions:
|
||||
historySize <= 3 GB
|
||||
keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
|
||||
*/
|
||||
int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
|
||||
UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
|
||||
ISzAlloc *alloc);
|
||||
void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc);
|
||||
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems);
|
||||
void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
|
||||
|
||||
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
|
||||
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
|
||||
UInt32 *distances, UInt32 maxLen);
|
||||
|
||||
/*
|
||||
Conditions:
|
||||
Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func.
|
||||
Mf_GetPointerToCurrentPos_Func's result must be used only before any other function
|
||||
*/
|
||||
|
||||
typedef void (*Mf_Init_Func)(void *object);
|
||||
typedef Byte (*Mf_GetIndexByte_Func)(void *object, Int32 index);
|
||||
typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
|
||||
typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
|
||||
typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
|
||||
typedef void (*Mf_Skip_Func)(void *object, UInt32);
|
||||
|
||||
typedef struct _IMatchFinder
|
||||
{
|
||||
Mf_Init_Func Init;
|
||||
Mf_GetIndexByte_Func GetIndexByte;
|
||||
Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
|
||||
Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
|
||||
Mf_GetMatches_Func GetMatches;
|
||||
Mf_Skip_Func Skip;
|
||||
} IMatchFinder;
|
||||
|
||||
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
|
||||
|
||||
void MatchFinder_Init(CMatchFinder *p);
|
||||
UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
|
||||
UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
|
||||
void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
|
||||
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
|
||||
|
||||
#endif
|
||||
54
vendor/easylzma/src/pavlov/LzHash.h
vendored
54
vendor/easylzma/src/pavlov/LzHash.h
vendored
|
|
@ -1,54 +0,0 @@
|
|||
/* LzHash.h -- HASH functions for LZ algorithms
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZHASH_H
|
||||
#define __LZHASH_H
|
||||
|
||||
#define kHash2Size (1 << 10)
|
||||
#define kHash3Size (1 << 16)
|
||||
#define kHash4Size (1 << 20)
|
||||
|
||||
#define kFix3HashSize (kHash2Size)
|
||||
#define kFix4HashSize (kHash2Size + kHash3Size)
|
||||
#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
|
||||
|
||||
#define HASH2_CALC hashValue = cur[0] | ((UInt32)cur[1] << 8);
|
||||
|
||||
#define HASH3_CALC { \
|
||||
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
|
||||
hash2Value = temp & (kHash2Size - 1); \
|
||||
hashValue = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
|
||||
|
||||
#define HASH4_CALC { \
|
||||
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
|
||||
hash2Value = temp & (kHash2Size - 1); \
|
||||
hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
|
||||
hashValue = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
|
||||
|
||||
#define HASH5_CALC { \
|
||||
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
|
||||
hash2Value = temp & (kHash2Size - 1); \
|
||||
hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
|
||||
hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)); \
|
||||
hashValue = (hash4Value ^ (p->crc[cur[4]] << 3)) & p->hashMask; \
|
||||
hash4Value &= (kHash4Size - 1); }
|
||||
|
||||
/* #define HASH_ZIP_CALC hashValue = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
|
||||
#define HASH_ZIP_CALC hashValue = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
|
||||
|
||||
|
||||
#define MT_HASH2_CALC \
|
||||
hash2Value = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
|
||||
|
||||
#define MT_HASH3_CALC { \
|
||||
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
|
||||
hash2Value = temp & (kHash2Size - 1); \
|
||||
hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
|
||||
|
||||
#define MT_HASH4_CALC { \
|
||||
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
|
||||
hash2Value = temp & (kHash2Size - 1); \
|
||||
hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
|
||||
hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
|
||||
|
||||
#endif
|
||||
1007
vendor/easylzma/src/pavlov/LzmaDec.c
vendored
1007
vendor/easylzma/src/pavlov/LzmaDec.c
vendored
File diff suppressed because it is too large
Load diff
223
vendor/easylzma/src/pavlov/LzmaDec.h
vendored
223
vendor/easylzma/src/pavlov/LzmaDec.h
vendored
|
|
@ -1,223 +0,0 @@
|
|||
/* LzmaDec.h -- LZMA Decoder
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZMADEC_H
|
||||
#define __LZMADEC_H
|
||||
|
||||
#include "Types.h"
|
||||
|
||||
/* #define _LZMA_PROB32 */
|
||||
/* _LZMA_PROB32 can increase the speed on some CPUs,
|
||||
but memory usage for CLzmaDec::probs will be doubled in that case */
|
||||
|
||||
#ifdef _LZMA_PROB32
|
||||
#define CLzmaProb UInt32
|
||||
#else
|
||||
#define CLzmaProb UInt16
|
||||
#endif
|
||||
|
||||
|
||||
/* ---------- LZMA Properties ---------- */
|
||||
|
||||
#define LZMA_PROPS_SIZE 5
|
||||
|
||||
typedef struct _CLzmaProps
|
||||
{
|
||||
unsigned lc, lp, pb;
|
||||
UInt32 dicSize;
|
||||
} CLzmaProps;
|
||||
|
||||
/* LzmaProps_Decode - decodes properties
|
||||
Returns:
|
||||
SZ_OK
|
||||
SZ_ERROR_UNSUPPORTED - Unsupported properties
|
||||
*/
|
||||
|
||||
SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
|
||||
|
||||
|
||||
/* ---------- LZMA Decoder state ---------- */
|
||||
|
||||
/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
|
||||
Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
|
||||
|
||||
#define LZMA_REQUIRED_INPUT_MAX 20
|
||||
|
||||
typedef struct
|
||||
{
|
||||
CLzmaProps prop;
|
||||
CLzmaProb *probs;
|
||||
Byte *dic;
|
||||
const Byte *buf;
|
||||
UInt32 range, code;
|
||||
SizeT dicPos;
|
||||
SizeT dicBufSize;
|
||||
UInt32 processedPos;
|
||||
UInt32 checkDicSize;
|
||||
unsigned state;
|
||||
UInt32 reps[4];
|
||||
unsigned remainLen;
|
||||
int needFlush;
|
||||
int needInitState;
|
||||
UInt32 numProbs;
|
||||
unsigned tempBufSize;
|
||||
Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
|
||||
} CLzmaDec;
|
||||
|
||||
#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; }
|
||||
|
||||
void LzmaDec_Init(CLzmaDec *p);
|
||||
|
||||
/* There are two types of LZMA streams:
|
||||
0) Stream with end mark. That end mark adds about 6 bytes to compressed size.
|
||||
1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */
|
||||
|
||||
typedef enum
|
||||
{
|
||||
LZMA_FINISH_ANY, /* finish at any point */
|
||||
LZMA_FINISH_END /* block must be finished at the end */
|
||||
} ELzmaFinishMode;
|
||||
|
||||
/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
|
||||
|
||||
You must use LZMA_FINISH_END, when you know that current output buffer
|
||||
covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
|
||||
|
||||
If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
|
||||
and output value of destLen will be less than output buffer size limit.
|
||||
You can check status result also.
|
||||
|
||||
You can use multiple checks to test data integrity after full decompression:
|
||||
1) Check Result and "status" variable.
|
||||
2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
|
||||
3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
|
||||
You must use correct finish mode in that case. */
|
||||
|
||||
typedef enum
|
||||
{
|
||||
LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */
|
||||
LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */
|
||||
LZMA_STATUS_NOT_FINISHED, /* stream was not finished */
|
||||
LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */
|
||||
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */
|
||||
} ELzmaStatus;
|
||||
|
||||
/* ELzmaStatus is used only as output value for function call */
|
||||
|
||||
|
||||
/* ---------- Interfaces ---------- */
|
||||
|
||||
/* There are 3 levels of interfaces:
|
||||
1) Dictionary Interface
|
||||
2) Buffer Interface
|
||||
3) One Call Interface
|
||||
You can select any of these interfaces, but don't mix functions from different
|
||||
groups for same object. */
|
||||
|
||||
|
||||
/* There are two variants to allocate state for Dictionary Interface:
|
||||
1) LzmaDec_Allocate / LzmaDec_Free
|
||||
2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
|
||||
You can use variant 2, if you set dictionary buffer manually.
|
||||
For Buffer Interface you must always use variant 1.
|
||||
|
||||
LzmaDec_Allocate* can return:
|
||||
SZ_OK
|
||||
SZ_ERROR_MEM - Memory allocation error
|
||||
SZ_ERROR_UNSUPPORTED - Unsupported properties
|
||||
*/
|
||||
|
||||
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc);
|
||||
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc);
|
||||
|
||||
SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc);
|
||||
void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc);
|
||||
|
||||
/* ---------- Dictionary Interface ---------- */
|
||||
|
||||
/* You can use it, if you want to eliminate the overhead for data copying from
|
||||
dictionary to some other external buffer.
|
||||
You must work with CLzmaDec variables directly in this interface.
|
||||
|
||||
STEPS:
|
||||
LzmaDec_Constr()
|
||||
LzmaDec_Allocate()
|
||||
for (each new stream)
|
||||
{
|
||||
LzmaDec_Init()
|
||||
while (it needs more decompression)
|
||||
{
|
||||
LzmaDec_DecodeToDic()
|
||||
use data from CLzmaDec::dic and update CLzmaDec::dicPos
|
||||
}
|
||||
}
|
||||
LzmaDec_Free()
|
||||
*/
|
||||
|
||||
/* LzmaDec_DecodeToDic
|
||||
|
||||
The decoding to internal dictionary buffer (CLzmaDec::dic).
|
||||
You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!
|
||||
|
||||
finishMode:
|
||||
It has meaning only if the decoding reaches output limit (dicLimit).
|
||||
LZMA_FINISH_ANY - Decode just dicLimit bytes.
|
||||
LZMA_FINISH_END - Stream must be finished after dicLimit.
|
||||
|
||||
Returns:
|
||||
SZ_OK
|
||||
status:
|
||||
LZMA_STATUS_FINISHED_WITH_MARK
|
||||
LZMA_STATUS_NOT_FINISHED
|
||||
LZMA_STATUS_NEEDS_MORE_INPUT
|
||||
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
|
||||
SZ_ERROR_DATA - Data error
|
||||
*/
|
||||
|
||||
SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
|
||||
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
|
||||
|
||||
|
||||
/* ---------- Buffer Interface ---------- */
|
||||
|
||||
/* It's zlib-like interface.
|
||||
See LzmaDec_DecodeToDic description for information about STEPS and return results,
|
||||
but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
|
||||
to work with CLzmaDec variables manually.
|
||||
|
||||
finishMode:
|
||||
It has meaning only if the decoding reaches output limit (*destLen).
|
||||
LZMA_FINISH_ANY - Decode just destLen bytes.
|
||||
LZMA_FINISH_END - Stream must be finished after (*destLen).
|
||||
*/
|
||||
|
||||
SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
|
||||
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
|
||||
|
||||
|
||||
/* ---------- One Call Interface ---------- */
|
||||
|
||||
/* LzmaDecode
|
||||
|
||||
finishMode:
|
||||
It has meaning only if the decoding reaches output limit (*destLen).
|
||||
LZMA_FINISH_ANY - Decode just destLen bytes.
|
||||
LZMA_FINISH_END - Stream must be finished after (*destLen).
|
||||
|
||||
Returns:
|
||||
SZ_OK
|
||||
status:
|
||||
LZMA_STATUS_FINISHED_WITH_MARK
|
||||
LZMA_STATUS_NOT_FINISHED
|
||||
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
|
||||
SZ_ERROR_DATA - Data error
|
||||
SZ_ERROR_MEM - Memory allocation error
|
||||
SZ_ERROR_UNSUPPORTED - Unsupported properties
|
||||
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
|
||||
*/
|
||||
|
||||
SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
|
||||
const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
|
||||
ELzmaStatus *status, ISzAlloc *alloc);
|
||||
|
||||
#endif
|
||||
2275
vendor/easylzma/src/pavlov/LzmaEnc.c
vendored
2275
vendor/easylzma/src/pavlov/LzmaEnc.c
vendored
File diff suppressed because it is too large
Load diff
72
vendor/easylzma/src/pavlov/LzmaEnc.h
vendored
72
vendor/easylzma/src/pavlov/LzmaEnc.h
vendored
|
|
@ -1,72 +0,0 @@
|
|||
/* LzmaEnc.h -- LZMA Encoder
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZMAENC_H
|
||||
#define __LZMAENC_H
|
||||
|
||||
#include "Types.h"
|
||||
|
||||
#define LZMA_PROPS_SIZE 5
|
||||
|
||||
typedef struct _CLzmaEncProps
|
||||
{
|
||||
int level; /* 0 <= level <= 9 */
|
||||
UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
|
||||
(1 << 12) <= dictSize <= (1 << 30) for 64-bit version
|
||||
default = (1 << 24) */
|
||||
int lc; /* 0 <= lc <= 8, default = 3 */
|
||||
int lp; /* 0 <= lp <= 4, default = 0 */
|
||||
int pb; /* 0 <= pb <= 4, default = 2 */
|
||||
int algo; /* 0 - fast, 1 - normal, default = 1 */
|
||||
int fb; /* 5 <= fb <= 273, default = 32 */
|
||||
int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
|
||||
int numHashBytes; /* 2, 3 or 4, default = 4 */
|
||||
UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */
|
||||
unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
|
||||
int numThreads; /* 1 or 2, default = 2 */
|
||||
} CLzmaEncProps;
|
||||
|
||||
void LzmaEncProps_Init(CLzmaEncProps *p);
|
||||
void LzmaEncProps_Normalize(CLzmaEncProps *p);
|
||||
UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2);
|
||||
|
||||
|
||||
/* ---------- CLzmaEncHandle Interface ---------- */
|
||||
|
||||
/* LzmaEnc_* functions can return the following exit codes:
|
||||
Returns:
|
||||
SZ_OK - OK
|
||||
SZ_ERROR_MEM - Memory allocation error
|
||||
SZ_ERROR_PARAM - Incorrect paramater in props
|
||||
SZ_ERROR_WRITE - Write callback error.
|
||||
SZ_ERROR_PROGRESS - some break from progress callback
|
||||
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
|
||||
*/
|
||||
|
||||
typedef void * CLzmaEncHandle;
|
||||
|
||||
CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc);
|
||||
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig);
|
||||
SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props);
|
||||
SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
|
||||
SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,
|
||||
ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
|
||||
SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
|
||||
int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
|
||||
|
||||
/* ---------- One Call Interface ---------- */
|
||||
|
||||
/* LzmaEncode
|
||||
Return code:
|
||||
SZ_OK - OK
|
||||
SZ_ERROR_MEM - Memory allocation error
|
||||
SZ_ERROR_PARAM - Incorrect paramater
|
||||
SZ_ERROR_OUTPUT_EOF - output buffer overflow
|
||||
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
|
||||
*/
|
||||
|
||||
SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
|
||||
const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
|
||||
ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
|
||||
|
||||
#endif
|
||||
46
vendor/easylzma/src/pavlov/LzmaLib.c
vendored
46
vendor/easylzma/src/pavlov/LzmaLib.c
vendored
|
|
@ -1,46 +0,0 @@
|
|||
/* LzmaLib.c -- LZMA library wrapper
|
||||
2008-08-05
|
||||
Igor Pavlov
|
||||
Public domain */
|
||||
|
||||
#include "LzmaEnc.h"
|
||||
#include "LzmaDec.h"
|
||||
#include "Alloc.h"
|
||||
#include "LzmaLib.h"
|
||||
|
||||
static void *SzAlloc(void *p, size_t size) { (void)p; return MyAlloc(size); }
|
||||
static void SzFree(void *p, void *address) { (void)p; MyFree(address); }
|
||||
static ISzAlloc g_Alloc = { SzAlloc, SzFree };
|
||||
|
||||
MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
|
||||
unsigned char *outProps, size_t *outPropsSize,
|
||||
int level, /* 0 <= level <= 9, default = 5 */
|
||||
unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */
|
||||
int lc, /* 0 <= lc <= 8, default = 3 */
|
||||
int lp, /* 0 <= lp <= 4, default = 0 */
|
||||
int pb, /* 0 <= pb <= 4, default = 2 */
|
||||
int fb, /* 5 <= fb <= 273, default = 32 */
|
||||
int numThreads /* 1 or 2, default = 2 */
|
||||
)
|
||||
{
|
||||
CLzmaEncProps props;
|
||||
LzmaEncProps_Init(&props);
|
||||
props.level = level;
|
||||
props.dictSize = dictSize;
|
||||
props.lc = lc;
|
||||
props.lp = lp;
|
||||
props.pb = pb;
|
||||
props.fb = fb;
|
||||
props.numThreads = numThreads;
|
||||
|
||||
return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0,
|
||||
NULL, &g_Alloc, &g_Alloc);
|
||||
}
|
||||
|
||||
|
||||
MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen,
|
||||
const unsigned char *props, size_t propsSize)
|
||||
{
|
||||
ELzmaStatus status;
|
||||
return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc);
|
||||
}
|
||||
135
vendor/easylzma/src/pavlov/LzmaLib.h
vendored
135
vendor/easylzma/src/pavlov/LzmaLib.h
vendored
|
|
@ -1,135 +0,0 @@
|
|||
/* LzmaLib.h -- LZMA library interface
|
||||
2008-08-05
|
||||
Igor Pavlov
|
||||
Public domain */
|
||||
|
||||
#ifndef __LZMALIB_H
|
||||
#define __LZMALIB_H
|
||||
|
||||
#include "Types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
#define MY_EXTERN_C extern "C"
|
||||
#else
|
||||
#define MY_EXTERN_C extern
|
||||
#endif
|
||||
|
||||
#define MY_STDAPI MY_EXTERN_C int MY_STD_CALL
|
||||
|
||||
#define LZMA_PROPS_SIZE 5
|
||||
|
||||
/*
|
||||
RAM requirements for LZMA:
|
||||
for compression: (dictSize * 11.5 + 6 MB) + state_size
|
||||
for decompression: dictSize + state_size
|
||||
state_size = (4 + (1.5 << (lc + lp))) KB
|
||||
by default (lc=3, lp=0), state_size = 16 KB.
|
||||
|
||||
LZMA properties (5 bytes) format
|
||||
Offset Size Description
|
||||
0 1 lc, lp and pb in encoded form.
|
||||
1 4 dictSize (little endian).
|
||||
*/
|
||||
|
||||
/*
|
||||
LzmaCompress
|
||||
------------
|
||||
|
||||
outPropsSize -
|
||||
In: the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
|
||||
Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
|
||||
|
||||
LZMA Encoder will use defult values for any parameter, if it is
|
||||
-1 for any from: level, loc, lp, pb, fb, numThreads
|
||||
0 for dictSize
|
||||
|
||||
level - compression level: 0 <= level <= 9;
|
||||
|
||||
level dictSize algo fb
|
||||
0: 16 KB 0 32
|
||||
1: 64 KB 0 32
|
||||
2: 256 KB 0 32
|
||||
3: 1 MB 0 32
|
||||
4: 4 MB 0 32
|
||||
5: 16 MB 1 32
|
||||
6: 32 MB 1 32
|
||||
7+: 64 MB 1 64
|
||||
|
||||
The default value for "level" is 5.
|
||||
|
||||
algo = 0 means fast method
|
||||
algo = 1 means normal method
|
||||
|
||||
dictSize - The dictionary size in bytes. The maximum value is
|
||||
128 MB = (1 << 27) bytes for 32-bit version
|
||||
1 GB = (1 << 30) bytes for 64-bit version
|
||||
The default value is 16 MB = (1 << 24) bytes.
|
||||
It's recommended to use the dictionary that is larger than 4 KB and
|
||||
that can be calculated as (1 << N) or (3 << N) sizes.
|
||||
|
||||
lc - The number of literal context bits (high bits of previous literal).
|
||||
It can be in the range from 0 to 8. The default value is 3.
|
||||
Sometimes lc=4 gives the gain for big files.
|
||||
|
||||
lp - The number of literal pos bits (low bits of current position for literals).
|
||||
It can be in the range from 0 to 4. The default value is 0.
|
||||
The lp switch is intended for periodical data when the period is equal to 2^lp.
|
||||
For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's
|
||||
better to set lc=0, if you change lp switch.
|
||||
|
||||
pb - The number of pos bits (low bits of current position).
|
||||
It can be in the range from 0 to 4. The default value is 2.
|
||||
The pb switch is intended for periodical data when the period is equal 2^pb.
|
||||
|
||||
fb - Word size (the number of fast bytes).
|
||||
It can be in the range from 5 to 273. The default value is 32.
|
||||
Usually, a big number gives a little bit better compression ratio and
|
||||
slower compression process.
|
||||
|
||||
numThreads - The number of thereads. 1 or 2. The default value is 2.
|
||||
Fast mode (algo = 0) can use only 1 thread.
|
||||
|
||||
Out:
|
||||
destLen - processed output size
|
||||
Returns:
|
||||
SZ_OK - OK
|
||||
SZ_ERROR_MEM - Memory allocation error
|
||||
SZ_ERROR_PARAM - Incorrect paramater
|
||||
SZ_ERROR_OUTPUT_EOF - output buffer overflow
|
||||
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
|
||||
*/
|
||||
|
||||
MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
|
||||
unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */
|
||||
int level, /* 0 <= level <= 9, default = 5 */
|
||||
unsigned dictSize, /* default = (1 << 24) */
|
||||
int lc, /* 0 <= lc <= 8, default = 3 */
|
||||
int lp, /* 0 <= lp <= 4, default = 0 */
|
||||
int pb, /* 0 <= pb <= 4, default = 2 */
|
||||
int fb, /* 5 <= fb <= 273, default = 32 */
|
||||
int numThreads /* 1 or 2, default = 2 */
|
||||
);
|
||||
|
||||
/*
|
||||
LzmaUncompress
|
||||
--------------
|
||||
In:
|
||||
dest - output data
|
||||
destLen - output data size
|
||||
src - input data
|
||||
srcLen - input data size
|
||||
Out:
|
||||
destLen - processed output size
|
||||
srcLen - processed input size
|
||||
Returns:
|
||||
SZ_OK - OK
|
||||
SZ_ERROR_DATA - Data error
|
||||
SZ_ERROR_MEM - Memory allocation arror
|
||||
SZ_ERROR_UNSUPPORTED - Unsupported properties
|
||||
SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src)
|
||||
*/
|
||||
|
||||
MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
|
||||
const unsigned char *props, size_t propsSize);
|
||||
|
||||
#endif
|
||||
208
vendor/easylzma/src/pavlov/Types.h
vendored
208
vendor/easylzma/src/pavlov/Types.h
vendored
|
|
@ -1,208 +0,0 @@
|
|||
/* Types.h -- Basic types
|
||||
2008-11-23 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __7Z_TYPES_H
|
||||
#define __7Z_TYPES_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#define SZ_OK 0
|
||||
|
||||
#define SZ_ERROR_DATA 1
|
||||
#define SZ_ERROR_MEM 2
|
||||
#define SZ_ERROR_CRC 3
|
||||
#define SZ_ERROR_UNSUPPORTED 4
|
||||
#define SZ_ERROR_PARAM 5
|
||||
#define SZ_ERROR_INPUT_EOF 6
|
||||
#define SZ_ERROR_OUTPUT_EOF 7
|
||||
#define SZ_ERROR_READ 8
|
||||
#define SZ_ERROR_WRITE 9
|
||||
#define SZ_ERROR_PROGRESS 10
|
||||
#define SZ_ERROR_FAIL 11
|
||||
#define SZ_ERROR_THREAD 12
|
||||
|
||||
#define SZ_ERROR_ARCHIVE 16
|
||||
#define SZ_ERROR_NO_ARCHIVE 17
|
||||
|
||||
typedef int SRes;
|
||||
|
||||
#ifdef _WIN32
|
||||
typedef DWORD WRes;
|
||||
#else
|
||||
typedef int WRes;
|
||||
#endif
|
||||
|
||||
#ifndef RINOK
|
||||
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
|
||||
#endif
|
||||
|
||||
typedef unsigned char Byte;
|
||||
typedef short Int16;
|
||||
typedef unsigned short UInt16;
|
||||
|
||||
#ifdef _LZMA_UINT32_IS_ULONG
|
||||
typedef long Int32;
|
||||
typedef unsigned long UInt32;
|
||||
#else
|
||||
typedef int Int32;
|
||||
typedef unsigned int UInt32;
|
||||
#endif
|
||||
|
||||
#ifdef _SZ_NO_INT_64
|
||||
|
||||
/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
|
||||
NOTES: Some code will work incorrectly in that case! */
|
||||
|
||||
typedef long Int64;
|
||||
typedef unsigned long UInt64;
|
||||
|
||||
#else
|
||||
|
||||
#if defined(_MSC_VER) || defined(__BORLANDC__)
|
||||
typedef __int64 Int64;
|
||||
typedef unsigned __int64 UInt64;
|
||||
#else
|
||||
typedef long long int Int64;
|
||||
typedef unsigned long long int UInt64;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef _LZMA_NO_SYSTEM_SIZE_T
|
||||
typedef UInt32 SizeT;
|
||||
#else
|
||||
typedef size_t SizeT;
|
||||
#endif
|
||||
|
||||
typedef int Bool;
|
||||
#define True 1
|
||||
#define False 0
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#if _MSC_VER >= 1300
|
||||
#define MY_NO_INLINE __declspec(noinline)
|
||||
#else
|
||||
#define MY_NO_INLINE
|
||||
#endif
|
||||
|
||||
#define MY_CDECL __cdecl
|
||||
#define MY_STD_CALL __stdcall
|
||||
#define MY_FAST_CALL MY_NO_INLINE __fastcall
|
||||
|
||||
#else
|
||||
|
||||
#define MY_CDECL
|
||||
#define MY_STD_CALL
|
||||
#define MY_FAST_CALL
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* The following interfaces use first parameter as pointer to structure */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
SRes (*Read)(void *p, void *buf, size_t *size);
|
||||
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
|
||||
(output(*size) < input(*size)) is allowed */
|
||||
} ISeqInStream;
|
||||
|
||||
/* it can return SZ_ERROR_INPUT_EOF */
|
||||
SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size);
|
||||
SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType);
|
||||
SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
size_t (*Write)(void *p, const void *buf, size_t size);
|
||||
/* Returns: result - the number of actually written bytes.
|
||||
(result < size) means error */
|
||||
} ISeqOutStream;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SZ_SEEK_SET = 0,
|
||||
SZ_SEEK_CUR = 1,
|
||||
SZ_SEEK_END = 2
|
||||
} ESzSeek;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
SRes (*Read)(void *p, void *buf, size_t *size); /* same as ISeqInStream::Read */
|
||||
SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
|
||||
} ISeekInStream;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
SRes (*Look)(void *p, void **buf, size_t *size);
|
||||
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
|
||||
(output(*size) > input(*size)) is not allowed
|
||||
(output(*size) < input(*size)) is allowed */
|
||||
SRes (*Skip)(void *p, size_t offset);
|
||||
/* offset must be <= output(*size) of Look */
|
||||
|
||||
SRes (*Read)(void *p, void *buf, size_t *size);
|
||||
/* reads directly (without buffer). It's same as ISeqInStream::Read */
|
||||
SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
|
||||
} ILookInStream;
|
||||
|
||||
SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size);
|
||||
SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset);
|
||||
|
||||
/* reads via ILookInStream::Read */
|
||||
SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType);
|
||||
SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size);
|
||||
|
||||
#define LookToRead_BUF_SIZE (1 << 14)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ILookInStream s;
|
||||
ISeekInStream *realStream;
|
||||
size_t pos;
|
||||
size_t size;
|
||||
Byte buf[LookToRead_BUF_SIZE];
|
||||
} CLookToRead;
|
||||
|
||||
void LookToRead_CreateVTable(CLookToRead *p, int lookahead);
|
||||
void LookToRead_Init(CLookToRead *p);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ISeqInStream s;
|
||||
ILookInStream *realStream;
|
||||
} CSecToLook;
|
||||
|
||||
void SecToLook_CreateVTable(CSecToLook *p);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ISeqInStream s;
|
||||
ILookInStream *realStream;
|
||||
} CSecToRead;
|
||||
|
||||
void SecToRead_CreateVTable(CSecToRead *p);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize);
|
||||
/* Returns: result. (result != SZ_OK) means break.
|
||||
Value (UInt64)(Int64)-1 for size means unknown value. */
|
||||
} ICompressProgress;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
void *(*Alloc)(void *p, size_t size);
|
||||
void (*Free)(void *p, void *address); /* address can be 0 */
|
||||
} ISzAlloc;
|
||||
|
||||
#define IAlloc_Alloc(p, size) (p)->Alloc((p), size)
|
||||
#define IAlloc_Free(p, a) (p)->Free((p), a)
|
||||
|
||||
#endif
|
||||
272
vendor/salsa20/ecrypt-config.h
vendored
272
vendor/salsa20/ecrypt-config.h
vendored
|
|
@ -1,272 +0,0 @@
|
|||
/* ecrypt-config.h */
|
||||
|
||||
/* *** Normally, it should not be necessary to edit this file. *** */
|
||||
|
||||
#ifndef ECRYPT_CONFIG
|
||||
#define ECRYPT_CONFIG
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* Guess the endianness of the target architecture. */
|
||||
|
||||
/*
|
||||
* The LITTLE endian machines:
|
||||
*/
|
||||
#if defined(__ultrix) /* Older MIPS */
|
||||
#define ECRYPT_LITTLE_ENDIAN
|
||||
#elif defined(__alpha) /* Alpha */
|
||||
#define ECRYPT_LITTLE_ENDIAN
|
||||
#elif defined(i386) /* x86 (gcc) */
|
||||
#define ECRYPT_LITTLE_ENDIAN
|
||||
#elif defined(__i386) /* x86 (gcc) */
|
||||
#define ECRYPT_LITTLE_ENDIAN
|
||||
#elif defined(_M_IX86) /* x86 (MSC, Borland) */
|
||||
#define ECRYPT_LITTLE_ENDIAN
|
||||
#elif defined(_MSC_VER) /* x86 (surely MSC) */
|
||||
#define ECRYPT_LITTLE_ENDIAN
|
||||
#elif defined(__INTEL_COMPILER) /* x86 (surely Intel compiler icl.exe) */
|
||||
#define ECRYPT_LITTLE_ENDIAN
|
||||
|
||||
/*
|
||||
* The BIG endian machines:
|
||||
*/
|
||||
#elif defined(sun) /* Newer Sparc's */
|
||||
#define ECRYPT_BIG_ENDIAN
|
||||
#elif defined(__ppc__) /* PowerPC */
|
||||
#define ECRYPT_BIG_ENDIAN
|
||||
|
||||
/*
|
||||
* Finally machines with UNKNOWN endianness:
|
||||
*/
|
||||
#elif defined (_AIX) /* RS6000 */
|
||||
#define ECRYPT_UNKNOWN
|
||||
#elif defined(__hpux) /* HP-PA */
|
||||
#define ECRYPT_UNKNOWN
|
||||
#elif defined(__aux) /* 68K */
|
||||
#define ECRYPT_UNKNOWN
|
||||
#elif defined(__dgux) /* 88K (but P6 in latest boxes) */
|
||||
#define ECRYPT_UNKNOWN
|
||||
#elif defined(__sgi) /* Newer MIPS */
|
||||
#define ECRYPT_UNKNOWN
|
||||
#else /* Any other processor */
|
||||
#define ECRYPT_UNKNOWN
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Find minimal-width types to store 8-bit, 16-bit, 32-bit, and 64-bit
|
||||
* integers.
|
||||
*
|
||||
* Note: to enable 64-bit types on 32-bit compilers, it might be
|
||||
* necessary to switch from ISO C90 mode to ISO C99 mode (e.g., gcc
|
||||
* -std=c99).
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
/* --- check char --- */
|
||||
|
||||
#if (UCHAR_MAX / 0xFU > 0xFU)
|
||||
#ifndef I8T
|
||||
#define I8T char
|
||||
#define U8C(v) (v##U)
|
||||
|
||||
#if (UCHAR_MAX == 0xFFU)
|
||||
#define ECRYPT_I8T_IS_BYTE
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if (UCHAR_MAX / 0xFFU > 0xFFU)
|
||||
#ifndef I16T
|
||||
#define I16T char
|
||||
#define U16C(v) (v##U)
|
||||
#endif
|
||||
|
||||
#if (UCHAR_MAX / 0xFFFFU > 0xFFFFU)
|
||||
#ifndef I32T
|
||||
#define I32T char
|
||||
#define U32C(v) (v##U)
|
||||
#endif
|
||||
|
||||
#if (UCHAR_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU)
|
||||
#ifndef I64T
|
||||
#define I64T char
|
||||
#define U64C(v) (v##U)
|
||||
#define ECRYPT_NATIVE64
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* --- check short --- */
|
||||
|
||||
#if (USHRT_MAX / 0xFU > 0xFU)
|
||||
#ifndef I8T
|
||||
#define I8T short
|
||||
#define U8C(v) (v##U)
|
||||
|
||||
#if (USHRT_MAX == 0xFFU)
|
||||
#define ECRYPT_I8T_IS_BYTE
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if (USHRT_MAX / 0xFFU > 0xFFU)
|
||||
#ifndef I16T
|
||||
#define I16T short
|
||||
#define U16C(v) (v##U)
|
||||
#endif
|
||||
|
||||
#if (USHRT_MAX / 0xFFFFU > 0xFFFFU)
|
||||
#ifndef I32T
|
||||
#define I32T short
|
||||
#define U32C(v) (v##U)
|
||||
#endif
|
||||
|
||||
#if (USHRT_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU)
|
||||
#ifndef I64T
|
||||
#define I64T short
|
||||
#define U64C(v) (v##U)
|
||||
#define ECRYPT_NATIVE64
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* --- check int --- */
|
||||
|
||||
#if (UINT_MAX / 0xFU > 0xFU)
|
||||
#ifndef I8T
|
||||
#define I8T int
|
||||
#define U8C(v) (v##U)
|
||||
|
||||
#if (ULONG_MAX == 0xFFU)
|
||||
#define ECRYPT_I8T_IS_BYTE
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if (UINT_MAX / 0xFFU > 0xFFU)
|
||||
#ifndef I16T
|
||||
#define I16T int
|
||||
#define U16C(v) (v##U)
|
||||
#endif
|
||||
|
||||
#if (UINT_MAX / 0xFFFFU > 0xFFFFU)
|
||||
#ifndef I32T
|
||||
#define I32T int
|
||||
#define U32C(v) (v##U)
|
||||
#endif
|
||||
|
||||
#if (UINT_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU)
|
||||
#ifndef I64T
|
||||
#define I64T int
|
||||
#define U64C(v) (v##U)
|
||||
#define ECRYPT_NATIVE64
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* --- check long --- */
|
||||
|
||||
#if (ULONG_MAX / 0xFUL > 0xFUL)
|
||||
#ifndef I8T
|
||||
#define I8T long
|
||||
#define U8C(v) (v##UL)
|
||||
|
||||
#if (ULONG_MAX == 0xFFUL)
|
||||
#define ECRYPT_I8T_IS_BYTE
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if (ULONG_MAX / 0xFFUL > 0xFFUL)
|
||||
#ifndef I16T
|
||||
#define I16T long
|
||||
#define U16C(v) (v##UL)
|
||||
#endif
|
||||
|
||||
#if (ULONG_MAX / 0xFFFFUL > 0xFFFFUL)
|
||||
#ifndef I32T
|
||||
#define I32T long
|
||||
#define U32C(v) (v##UL)
|
||||
#endif
|
||||
|
||||
#if (ULONG_MAX / 0xFFFFFFFFUL > 0xFFFFFFFFUL)
|
||||
#ifndef I64T
|
||||
#define I64T long
|
||||
#define U64C(v) (v##UL)
|
||||
#define ECRYPT_NATIVE64
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* --- check long long --- */
|
||||
|
||||
#ifdef ULLONG_MAX
|
||||
|
||||
#if (ULLONG_MAX / 0xFULL > 0xFULL)
|
||||
#ifndef I8T
|
||||
#define I8T long long
|
||||
#define U8C(v) (v##ULL)
|
||||
|
||||
#if (ULLONG_MAX == 0xFFULL)
|
||||
#define ECRYPT_I8T_IS_BYTE
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if (ULLONG_MAX / 0xFFULL > 0xFFULL)
|
||||
#ifndef I16T
|
||||
#define I16T long long
|
||||
#define U16C(v) (v##ULL)
|
||||
#endif
|
||||
|
||||
#if (ULLONG_MAX / 0xFFFFULL > 0xFFFFULL)
|
||||
#ifndef I32T
|
||||
#define I32T long long
|
||||
#define U32C(v) (v##ULL)
|
||||
#endif
|
||||
|
||||
#if (ULLONG_MAX / 0xFFFFFFFFULL > 0xFFFFFFFFULL)
|
||||
#ifndef I64T
|
||||
#define I64T long long
|
||||
#define U64C(v) (v##ULL)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/* --- check __int64 --- */
|
||||
|
||||
#ifdef _UI64_MAX
|
||||
|
||||
#if (_UI64_MAX / 0xFFFFFFFFui64 > 0xFFFFFFFFui64)
|
||||
#ifndef I64T
|
||||
#define I64T __int64
|
||||
#define U64C(v) (v##ui64)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
#endif
|
||||
46
vendor/salsa20/ecrypt-machine.h
vendored
46
vendor/salsa20/ecrypt-machine.h
vendored
|
|
@ -1,46 +0,0 @@
|
|||
/* ecrypt-machine.h */
|
||||
|
||||
/*
|
||||
* This file is included by 'ecrypt-portable.h'. It allows to override
|
||||
* the default macros for specific platforms. Please carefully check
|
||||
* the machine code generated by your compiler (with optimisations
|
||||
* turned on) before deciding to edit this file.
|
||||
*/
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
#if (defined(ECRYPT_DEFAULT_ROT) && !defined(ECRYPT_MACHINE_ROT))
|
||||
|
||||
#define ECRYPT_MACHINE_ROT
|
||||
|
||||
#if (defined(WIN32) && defined(_MSC_VER))
|
||||
|
||||
#undef ROTL32
|
||||
#undef ROTR32
|
||||
#undef ROTL64
|
||||
#undef ROTR64
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define ROTL32(v, n) _lrotl(v, n)
|
||||
#define ROTR32(v, n) _lrotr(v, n)
|
||||
#define ROTL64(v, n) _rotl64(v, n)
|
||||
#define ROTR64(v, n) _rotr64(v, n)
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
#if (defined(ECRYPT_DEFAULT_SWAP) && !defined(ECRYPT_MACHINE_SWAP))
|
||||
|
||||
#define ECRYPT_MACHINE_SWAP
|
||||
|
||||
/*
|
||||
* If you want to overwrite the default swap macros, put it here. And so on.
|
||||
*/
|
||||
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
303
vendor/salsa20/ecrypt-portable.h
vendored
303
vendor/salsa20/ecrypt-portable.h
vendored
|
|
@ -1,303 +0,0 @@
|
|||
/* ecrypt-portable.h */
|
||||
|
||||
/*
|
||||
* WARNING: the conversions defined below are implemented as macros,
|
||||
* and should be used carefully. They should NOT be used with
|
||||
* parameters which perform some action. E.g., the following two lines
|
||||
* are not equivalent:
|
||||
*
|
||||
* 1) ++x; y = ROTL32(x, n);
|
||||
* 2) y = ROTL32(++x, n);
|
||||
*/
|
||||
|
||||
/*
|
||||
* *** Please do not edit this file. ***
|
||||
*
|
||||
* The default macros can be overridden for specific architectures by
|
||||
* editing 'ecrypt-machine.h'.
|
||||
*/
|
||||
|
||||
#ifndef ECRYPT_PORTABLE
|
||||
#define ECRYPT_PORTABLE
|
||||
|
||||
#include "ecrypt-config.h"
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* The following types are defined (if available):
|
||||
*
|
||||
* u8: unsigned integer type, at least 8 bits
|
||||
* u16: unsigned integer type, at least 16 bits
|
||||
* u32: unsigned integer type, at least 32 bits
|
||||
* u64: unsigned integer type, at least 64 bits
|
||||
*
|
||||
* s8, s16, s32, s64 -> signed counterparts of u8, u16, u32, u64
|
||||
*
|
||||
* The selection of minimum-width integer types is taken care of by
|
||||
* 'ecrypt-config.h'. Note: to enable 64-bit types on 32-bit
|
||||
* compilers, it might be necessary to switch from ISO C90 mode to ISO
|
||||
* C99 mode (e.g., gcc -std=c99).
|
||||
*/
|
||||
|
||||
#ifdef I8T
|
||||
typedef signed I8T s8;
|
||||
typedef unsigned I8T u8;
|
||||
#endif
|
||||
|
||||
#ifdef I16T
|
||||
typedef signed I16T s16;
|
||||
typedef unsigned I16T u16;
|
||||
#endif
|
||||
|
||||
#ifdef I32T
|
||||
typedef signed I32T s32;
|
||||
typedef unsigned I32T u32;
|
||||
#endif
|
||||
|
||||
#ifdef I64T
|
||||
typedef signed I64T s64;
|
||||
typedef unsigned I64T u64;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The following macros are used to obtain exact-width results.
|
||||
*/
|
||||
|
||||
#define U8V(v) ((u8)(v) & U8C(0xFF))
|
||||
#define U16V(v) ((u16)(v) & U16C(0xFFFF))
|
||||
#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
|
||||
#define U64V(v) ((u64)(v) & U64C(0xFFFFFFFFFFFFFFFF))
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* The following macros return words with their bits rotated over n
|
||||
* positions to the left/right.
|
||||
*/
|
||||
|
||||
#define ECRYPT_DEFAULT_ROT
|
||||
|
||||
#define ROTL8(v, n) \
|
||||
(U8V((v) << (n)) | ((v) >> (8 - (n))))
|
||||
|
||||
#define ROTL16(v, n) \
|
||||
(U16V((v) << (n)) | ((v) >> (16 - (n))))
|
||||
|
||||
#define ROTL32(v, n) \
|
||||
(U32V((v) << (n)) | ((v) >> (32 - (n))))
|
||||
|
||||
#define ROTL64(v, n) \
|
||||
(U64V((v) << (n)) | ((v) >> (64 - (n))))
|
||||
|
||||
#define ROTR8(v, n) ROTL8(v, 8 - (n))
|
||||
#define ROTR16(v, n) ROTL16(v, 16 - (n))
|
||||
#define ROTR32(v, n) ROTL32(v, 32 - (n))
|
||||
#define ROTR64(v, n) ROTL64(v, 64 - (n))
|
||||
|
||||
#include "ecrypt-machine.h"
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* The following macros return a word with bytes in reverse order.
|
||||
*/
|
||||
|
||||
#define ECRYPT_DEFAULT_SWAP
|
||||
|
||||
#define SWAP16(v) \
|
||||
ROTL16(v, 8)
|
||||
|
||||
#define SWAP32(v) \
|
||||
((ROTL32(v, 8) & U32C(0x00FF00FF)) | \
|
||||
(ROTL32(v, 24) & U32C(0xFF00FF00)))
|
||||
|
||||
#ifdef ECRYPT_NATIVE64
|
||||
#define SWAP64(v) \
|
||||
((ROTL64(v, 8) & U64C(0x000000FF000000FF)) | \
|
||||
(ROTL64(v, 24) & U64C(0x0000FF000000FF00)) | \
|
||||
(ROTL64(v, 40) & U64C(0x00FF000000FF0000)) | \
|
||||
(ROTL64(v, 56) & U64C(0xFF000000FF000000)))
|
||||
#else
|
||||
#define SWAP64(v) \
|
||||
(((u64)SWAP32(U32V(v)) << 32) | (u64)SWAP32(U32V(v >> 32)))
|
||||
#endif
|
||||
|
||||
#include "ecrypt-machine.h"
|
||||
|
||||
#define ECRYPT_DEFAULT_WTOW
|
||||
|
||||
#ifdef ECRYPT_LITTLE_ENDIAN
|
||||
#define U16TO16_LITTLE(v) (v)
|
||||
#define U32TO32_LITTLE(v) (v)
|
||||
#define U64TO64_LITTLE(v) (v)
|
||||
|
||||
#define U16TO16_BIG(v) SWAP16(v)
|
||||
#define U32TO32_BIG(v) SWAP32(v)
|
||||
#define U64TO64_BIG(v) SWAP64(v)
|
||||
#endif
|
||||
|
||||
#ifdef ECRYPT_BIG_ENDIAN
|
||||
#define U16TO16_LITTLE(v) SWAP16(v)
|
||||
#define U32TO32_LITTLE(v) SWAP32(v)
|
||||
#define U64TO64_LITTLE(v) SWAP64(v)
|
||||
|
||||
#define U16TO16_BIG(v) (v)
|
||||
#define U32TO32_BIG(v) (v)
|
||||
#define U64TO64_BIG(v) (v)
|
||||
#endif
|
||||
|
||||
#include "ecrypt-machine.h"
|
||||
|
||||
/*
|
||||
* The following macros load words from an array of bytes with
|
||||
* different types of endianness, and vice versa.
|
||||
*/
|
||||
|
||||
#define ECRYPT_DEFAULT_BTOW
|
||||
|
||||
#if (!defined(ECRYPT_UNKNOWN) && defined(ECRYPT_I8T_IS_BYTE))
|
||||
|
||||
#define U8TO16_LITTLE(p) U16TO16_LITTLE(((u16*)(p))[0])
|
||||
#define U8TO32_LITTLE(p) U32TO32_LITTLE(((u32*)(p))[0])
|
||||
#define U8TO64_LITTLE(p) U64TO64_LITTLE(((u64*)(p))[0])
|
||||
|
||||
#define U8TO16_BIG(p) U16TO16_BIG(((u16*)(p))[0])
|
||||
#define U8TO32_BIG(p) U32TO32_BIG(((u32*)(p))[0])
|
||||
#define U8TO64_BIG(p) U64TO64_BIG(((u64*)(p))[0])
|
||||
|
||||
#define U16TO8_LITTLE(p, v) (((u16*)(p))[0] = U16TO16_LITTLE(v))
|
||||
#define U32TO8_LITTLE(p, v) (((u32*)(p))[0] = U32TO32_LITTLE(v))
|
||||
#define U64TO8_LITTLE(p, v) (((u64*)(p))[0] = U64TO64_LITTLE(v))
|
||||
|
||||
#define U16TO8_BIG(p, v) (((u16*)(p))[0] = U16TO16_BIG(v))
|
||||
#define U32TO8_BIG(p, v) (((u32*)(p))[0] = U32TO32_BIG(v))
|
||||
#define U64TO8_BIG(p, v) (((u64*)(p))[0] = U64TO64_BIG(v))
|
||||
|
||||
#else
|
||||
|
||||
#define U8TO16_LITTLE(p) \
|
||||
(((u16)((p)[0]) ) | \
|
||||
((u16)((p)[1]) << 8))
|
||||
|
||||
#define U8TO32_LITTLE(p) \
|
||||
(((u32)((p)[0]) ) | \
|
||||
((u32)((p)[1]) << 8) | \
|
||||
((u32)((p)[2]) << 16) | \
|
||||
((u32)((p)[3]) << 24))
|
||||
|
||||
#ifdef ECRYPT_NATIVE64
|
||||
#define U8TO64_LITTLE(p) \
|
||||
(((u64)((p)[0]) ) | \
|
||||
((u64)((p)[1]) << 8) | \
|
||||
((u64)((p)[2]) << 16) | \
|
||||
((u64)((p)[3]) << 24) | \
|
||||
((u64)((p)[4]) << 32) | \
|
||||
((u64)((p)[5]) << 40) | \
|
||||
((u64)((p)[6]) << 48) | \
|
||||
((u64)((p)[7]) << 56))
|
||||
#else
|
||||
#define U8TO64_LITTLE(p) \
|
||||
((u64)U8TO32_LITTLE(p) | ((u64)U8TO32_LITTLE((p) + 4) << 32))
|
||||
#endif
|
||||
|
||||
#define U8TO16_BIG(p) \
|
||||
(((u16)((p)[0]) << 8) | \
|
||||
((u16)((p)[1]) ))
|
||||
|
||||
#define U8TO32_BIG(p) \
|
||||
(((u32)((p)[0]) << 24) | \
|
||||
((u32)((p)[1]) << 16) | \
|
||||
((u32)((p)[2]) << 8) | \
|
||||
((u32)((p)[3]) ))
|
||||
|
||||
#ifdef ECRYPT_NATIVE64
|
||||
#define U8TO64_BIG(p) \
|
||||
(((u64)((p)[0]) << 56) | \
|
||||
((u64)((p)[1]) << 48) | \
|
||||
((u64)((p)[2]) << 40) | \
|
||||
((u64)((p)[3]) << 32) | \
|
||||
((u64)((p)[4]) << 24) | \
|
||||
((u64)((p)[5]) << 16) | \
|
||||
((u64)((p)[6]) << 8) | \
|
||||
((u64)((p)[7]) ))
|
||||
#else
|
||||
#define U8TO64_BIG(p) \
|
||||
(((u64)U8TO32_BIG(p) << 32) | (u64)U8TO32_BIG((p) + 4))
|
||||
#endif
|
||||
|
||||
#define U16TO8_LITTLE(p, v) \
|
||||
do { \
|
||||
(p)[0] = U8V((v) ); \
|
||||
(p)[1] = U8V((v) >> 8); \
|
||||
} while (0)
|
||||
|
||||
#define U32TO8_LITTLE(p, v) \
|
||||
do { \
|
||||
(p)[0] = U8V((v) ); \
|
||||
(p)[1] = U8V((v) >> 8); \
|
||||
(p)[2] = U8V((v) >> 16); \
|
||||
(p)[3] = U8V((v) >> 24); \
|
||||
} while (0)
|
||||
|
||||
#ifdef ECRYPT_NATIVE64
|
||||
#define U64TO8_LITTLE(p, v) \
|
||||
do { \
|
||||
(p)[0] = U8V((v) ); \
|
||||
(p)[1] = U8V((v) >> 8); \
|
||||
(p)[2] = U8V((v) >> 16); \
|
||||
(p)[3] = U8V((v) >> 24); \
|
||||
(p)[4] = U8V((v) >> 32); \
|
||||
(p)[5] = U8V((v) >> 40); \
|
||||
(p)[6] = U8V((v) >> 48); \
|
||||
(p)[7] = U8V((v) >> 56); \
|
||||
} while (0)
|
||||
#else
|
||||
#define U64TO8_LITTLE(p, v) \
|
||||
do { \
|
||||
U32TO8_LITTLE((p), U32V((v) )); \
|
||||
U32TO8_LITTLE((p) + 4, U32V((v) >> 32)); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#define U16TO8_BIG(p, v) \
|
||||
do { \
|
||||
(p)[0] = U8V((v) ); \
|
||||
(p)[1] = U8V((v) >> 8); \
|
||||
} while (0)
|
||||
|
||||
#define U32TO8_BIG(p, v) \
|
||||
do { \
|
||||
(p)[0] = U8V((v) >> 24); \
|
||||
(p)[1] = U8V((v) >> 16); \
|
||||
(p)[2] = U8V((v) >> 8); \
|
||||
(p)[3] = U8V((v) ); \
|
||||
} while (0)
|
||||
|
||||
#ifdef ECRYPT_NATIVE64
|
||||
#define U64TO8_BIG(p, v) \
|
||||
do { \
|
||||
(p)[0] = U8V((v) >> 56); \
|
||||
(p)[1] = U8V((v) >> 48); \
|
||||
(p)[2] = U8V((v) >> 40); \
|
||||
(p)[3] = U8V((v) >> 32); \
|
||||
(p)[4] = U8V((v) >> 24); \
|
||||
(p)[5] = U8V((v) >> 16); \
|
||||
(p)[6] = U8V((v) >> 8); \
|
||||
(p)[7] = U8V((v) ); \
|
||||
} while (0)
|
||||
#else
|
||||
#define U64TO8_BIG(p, v) \
|
||||
do { \
|
||||
U32TO8_BIG((p), U32V((v) >> 32)); \
|
||||
U32TO8_BIG((p) + 4, U32V((v) )); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#include "ecrypt-machine.h"
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
#endif
|
||||
279
vendor/salsa20/ecrypt-sync.h
vendored
279
vendor/salsa20/ecrypt-sync.h
vendored
|
|
@ -1,279 +0,0 @@
|
|||
/* ecrypt-sync.h */
|
||||
|
||||
/*
|
||||
* Header file for synchronous stream ciphers without authentication
|
||||
* mechanism.
|
||||
*
|
||||
* *** Please only edit parts marked with "[edit]". ***
|
||||
*/
|
||||
|
||||
#ifndef ECRYPT_SYNC
|
||||
#define ECRYPT_SYNC
|
||||
|
||||
#include "ecrypt-portable.h"
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* Cipher parameters */
|
||||
|
||||
/*
|
||||
* The name of your cipher.
|
||||
*/
|
||||
#define ECRYPT_NAME "Salsa20" /* [edit] */
|
||||
#define ECRYPT_PROFILE "S!_H."
|
||||
|
||||
/*
|
||||
* Specify which key and IV sizes are supported by your cipher. A user
|
||||
* should be able to enumerate the supported sizes by running the
|
||||
* following code:
|
||||
*
|
||||
* for (i = 0; ECRYPT_KEYSIZE(i) <= ECRYPT_MAXKEYSIZE; ++i)
|
||||
* {
|
||||
* keysize = ECRYPT_KEYSIZE(i);
|
||||
*
|
||||
* ...
|
||||
* }
|
||||
*
|
||||
* All sizes are in bits.
|
||||
*/
|
||||
|
||||
#define ECRYPT_MAXKEYSIZE 256 /* [edit] */
|
||||
#define ECRYPT_KEYSIZE(i) (128 + (i)*128) /* [edit] */
|
||||
|
||||
#define ECRYPT_MAXIVSIZE 64 /* [edit] */
|
||||
#define ECRYPT_IVSIZE(i) (64 + (i)*64) /* [edit] */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* Data structures */
|
||||
|
||||
/*
|
||||
* ECRYPT_ctx is the structure containing the representation of the
|
||||
* internal state of your cipher.
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u32 input[16]; /* could be compressed */
|
||||
/*
|
||||
* [edit]
|
||||
*
|
||||
* Put here all state variable needed during the encryption process.
|
||||
*/
|
||||
} ECRYPT_ctx;
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* Mandatory functions */
|
||||
|
||||
/*
|
||||
* Key and message independent initialization. This function will be
|
||||
* called once when the program starts (e.g., to build expanded S-box
|
||||
* tables).
|
||||
*/
|
||||
void ECRYPT_init();
|
||||
|
||||
/*
|
||||
* Key setup. It is the user's responsibility to select the values of
|
||||
* keysize and ivsize from the set of supported values specified
|
||||
* above.
|
||||
*/
|
||||
void ECRYPT_keysetup(
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* key,
|
||||
u32 keysize, /* Key size in bits. */
|
||||
u32 ivsize); /* IV size in bits. */
|
||||
|
||||
/*
|
||||
* IV setup. After having called ECRYPT_keysetup(), the user is
|
||||
* allowed to call ECRYPT_ivsetup() different times in order to
|
||||
* encrypt/decrypt different messages with the same key but different
|
||||
* IV's.
|
||||
*/
|
||||
void ECRYPT_ivsetup(
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* iv);
|
||||
|
||||
/*
|
||||
* Encryption/decryption of arbitrary length messages.
|
||||
*
|
||||
* For efficiency reasons, the API provides two types of
|
||||
* encrypt/decrypt functions. The ECRYPT_encrypt_bytes() function
|
||||
* (declared here) encrypts byte strings of arbitrary length, while
|
||||
* the ECRYPT_encrypt_blocks() function (defined later) only accepts
|
||||
* lengths which are multiples of ECRYPT_BLOCKLENGTH.
|
||||
*
|
||||
* The user is allowed to make multiple calls to
|
||||
* ECRYPT_encrypt_blocks() to incrementally encrypt a long message,
|
||||
* but he is NOT allowed to make additional encryption calls once he
|
||||
* has called ECRYPT_encrypt_bytes() (unless he starts a new message
|
||||
* of course). For example, this sequence of calls is acceptable:
|
||||
*
|
||||
* ECRYPT_keysetup();
|
||||
*
|
||||
* ECRYPT_ivsetup();
|
||||
* ECRYPT_encrypt_blocks();
|
||||
* ECRYPT_encrypt_blocks();
|
||||
* ECRYPT_encrypt_bytes();
|
||||
*
|
||||
* ECRYPT_ivsetup();
|
||||
* ECRYPT_encrypt_blocks();
|
||||
* ECRYPT_encrypt_blocks();
|
||||
*
|
||||
* ECRYPT_ivsetup();
|
||||
* ECRYPT_encrypt_bytes();
|
||||
*
|
||||
* The following sequence is not:
|
||||
*
|
||||
* ECRYPT_keysetup();
|
||||
* ECRYPT_ivsetup();
|
||||
* ECRYPT_encrypt_blocks();
|
||||
* ECRYPT_encrypt_bytes();
|
||||
* ECRYPT_encrypt_blocks();
|
||||
*/
|
||||
|
||||
void ECRYPT_encrypt_bytes(
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* plaintext,
|
||||
u8* ciphertext,
|
||||
u32 msglen); /* Message length in bytes. */
|
||||
|
||||
void ECRYPT_decrypt_bytes(
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* ciphertext,
|
||||
u8* plaintext,
|
||||
u32 msglen); /* Message length in bytes. */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* Optional features */
|
||||
|
||||
/*
|
||||
* For testing purposes it can sometimes be useful to have a function
|
||||
* which immediately generates keystream without having to provide it
|
||||
* with a zero plaintext. If your cipher cannot provide this function
|
||||
* (e.g., because it is not strictly a synchronous cipher), please
|
||||
* reset the ECRYPT_GENERATES_KEYSTREAM flag.
|
||||
*/
|
||||
|
||||
#define ECRYPT_GENERATES_KEYSTREAM
|
||||
#ifdef ECRYPT_GENERATES_KEYSTREAM
|
||||
|
||||
void ECRYPT_keystream_bytes(
|
||||
ECRYPT_ctx* ctx,
|
||||
u8* keystream,
|
||||
u32 length); /* Length of keystream in bytes. */
|
||||
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* Optional optimizations */
|
||||
|
||||
/*
|
||||
* By default, the functions in this section are implemented using
|
||||
* calls to functions declared above. However, you might want to
|
||||
* implement them differently for performance reasons.
|
||||
*/
|
||||
|
||||
/*
|
||||
* All-in-one encryption/decryption of (short) packets.
|
||||
*
|
||||
* The default definitions of these functions can be found in
|
||||
* "ecrypt-sync.c". If you want to implement them differently, please
|
||||
* undef the ECRYPT_USES_DEFAULT_ALL_IN_ONE flag.
|
||||
*/
|
||||
#define ECRYPT_USES_DEFAULT_ALL_IN_ONE /* [edit] */
|
||||
|
||||
void ECRYPT_encrypt_packet(
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* iv,
|
||||
const u8* plaintext,
|
||||
u8* ciphertext,
|
||||
u32 msglen);
|
||||
|
||||
void ECRYPT_decrypt_packet(
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* iv,
|
||||
const u8* ciphertext,
|
||||
u8* plaintext,
|
||||
u32 msglen);
|
||||
|
||||
/*
|
||||
* Encryption/decryption of blocks.
|
||||
*
|
||||
* By default, these functions are defined as macros. If you want to
|
||||
* provide a different implementation, please undef the
|
||||
* ECRYPT_USES_DEFAULT_BLOCK_MACROS flag and implement the functions
|
||||
* declared below.
|
||||
*/
|
||||
|
||||
#define ECRYPT_BLOCKLENGTH 64 /* [edit] */
|
||||
|
||||
#define ECRYPT_USES_DEFAULT_BLOCK_MACROS /* [edit] */
|
||||
#ifdef ECRYPT_USES_DEFAULT_BLOCK_MACROS
|
||||
|
||||
#define ECRYPT_encrypt_blocks(ctx, plaintext, ciphertext, blocks) \
|
||||
ECRYPT_encrypt_bytes(ctx, plaintext, ciphertext, \
|
||||
(blocks) * ECRYPT_BLOCKLENGTH)
|
||||
|
||||
#define ECRYPT_decrypt_blocks(ctx, ciphertext, plaintext, blocks) \
|
||||
ECRYPT_decrypt_bytes(ctx, ciphertext, plaintext, \
|
||||
(blocks) * ECRYPT_BLOCKLENGTH)
|
||||
|
||||
#ifdef ECRYPT_GENERATES_KEYSTREAM
|
||||
|
||||
#define ECRYPT_keystream_blocks(ctx, keystream, blocks) \
|
||||
ECRYPT_keystream_bytes(ctx, keystream, \
|
||||
(blocks) * ECRYPT_BLOCKLENGTH)
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
void ECRYPT_encrypt_blocks(
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* plaintext,
|
||||
u8* ciphertext,
|
||||
u32 blocks); /* Message length in blocks. */
|
||||
|
||||
void ECRYPT_decrypt_blocks(
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* ciphertext,
|
||||
u8* plaintext,
|
||||
u32 blocks); /* Message length in blocks. */
|
||||
|
||||
#ifdef ECRYPT_GENERATES_KEYSTREAM
|
||||
|
||||
void ECRYPT_keystream_blocks(
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* keystream,
|
||||
u32 blocks); /* Keystream length in blocks. */
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If your cipher can be implemented in different ways, you can use
|
||||
* the ECRYPT_VARIANT parameter to allow the user to choose between
|
||||
* them at compile time (e.g., gcc -DECRYPT_VARIANT=3 ...). Please
|
||||
* only use this possibility if you really think it could make a
|
||||
* significant difference and keep the number of variants
|
||||
* (ECRYPT_MAXVARIANT) as small as possible (definitely not more than
|
||||
* 10). Note also that all variants should have exactly the same
|
||||
* external interface (i.e., the same ECRYPT_BLOCKLENGTH, etc.).
|
||||
*/
|
||||
#define ECRYPT_MAXVARIANT 1 /* [edit] */
|
||||
|
||||
#ifndef ECRYPT_VARIANT
|
||||
#define ECRYPT_VARIANT 1
|
||||
#endif
|
||||
|
||||
#if (ECRYPT_VARIANT > ECRYPT_MAXVARIANT)
|
||||
#error this variant does not exist
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
#endif
|
||||
127
vendor/salsa20/ecrypt.c
vendored
127
vendor/salsa20/ecrypt.c
vendored
|
|
@ -1,127 +0,0 @@
|
|||
#include "ecrypt-sync.h"
|
||||
|
||||
#define ROTATE(v,c) (ROTL32(v,c))
|
||||
#define XOR(v,w) ((v) ^ (w))
|
||||
#define PLUS(v,w) (U32V((v) + (w)))
|
||||
#define PLUSONE(v) (PLUS((v),1))
|
||||
|
||||
static void salsa20_wordtobyte(u8 output[64],const u32 input[16])
|
||||
{
|
||||
u32 x[16];
|
||||
int i;
|
||||
|
||||
for (i = 0;i < 16;++i) x[i] = input[i];
|
||||
for (i = 20;i > 0;i -= 2) {
|
||||
x[ 4] = XOR(x[ 4],ROTATE(PLUS(x[ 0],x[12]), 7));
|
||||
x[ 8] = XOR(x[ 8],ROTATE(PLUS(x[ 4],x[ 0]), 9));
|
||||
x[12] = XOR(x[12],ROTATE(PLUS(x[ 8],x[ 4]),13));
|
||||
x[ 0] = XOR(x[ 0],ROTATE(PLUS(x[12],x[ 8]),18));
|
||||
x[ 9] = XOR(x[ 9],ROTATE(PLUS(x[ 5],x[ 1]), 7));
|
||||
x[13] = XOR(x[13],ROTATE(PLUS(x[ 9],x[ 5]), 9));
|
||||
x[ 1] = XOR(x[ 1],ROTATE(PLUS(x[13],x[ 9]),13));
|
||||
x[ 5] = XOR(x[ 5],ROTATE(PLUS(x[ 1],x[13]),18));
|
||||
x[14] = XOR(x[14],ROTATE(PLUS(x[10],x[ 6]), 7));
|
||||
x[ 2] = XOR(x[ 2],ROTATE(PLUS(x[14],x[10]), 9));
|
||||
x[ 6] = XOR(x[ 6],ROTATE(PLUS(x[ 2],x[14]),13));
|
||||
x[10] = XOR(x[10],ROTATE(PLUS(x[ 6],x[ 2]),18));
|
||||
x[ 3] = XOR(x[ 3],ROTATE(PLUS(x[15],x[11]), 7));
|
||||
x[ 7] = XOR(x[ 7],ROTATE(PLUS(x[ 3],x[15]), 9));
|
||||
x[11] = XOR(x[11],ROTATE(PLUS(x[ 7],x[ 3]),13));
|
||||
x[15] = XOR(x[15],ROTATE(PLUS(x[11],x[ 7]),18));
|
||||
x[ 1] = XOR(x[ 1],ROTATE(PLUS(x[ 0],x[ 3]), 7));
|
||||
x[ 2] = XOR(x[ 2],ROTATE(PLUS(x[ 1],x[ 0]), 9));
|
||||
x[ 3] = XOR(x[ 3],ROTATE(PLUS(x[ 2],x[ 1]),13));
|
||||
x[ 0] = XOR(x[ 0],ROTATE(PLUS(x[ 3],x[ 2]),18));
|
||||
x[ 6] = XOR(x[ 6],ROTATE(PLUS(x[ 5],x[ 4]), 7));
|
||||
x[ 7] = XOR(x[ 7],ROTATE(PLUS(x[ 6],x[ 5]), 9));
|
||||
x[ 4] = XOR(x[ 4],ROTATE(PLUS(x[ 7],x[ 6]),13));
|
||||
x[ 5] = XOR(x[ 5],ROTATE(PLUS(x[ 4],x[ 7]),18));
|
||||
x[11] = XOR(x[11],ROTATE(PLUS(x[10],x[ 9]), 7));
|
||||
x[ 8] = XOR(x[ 8],ROTATE(PLUS(x[11],x[10]), 9));
|
||||
x[ 9] = XOR(x[ 9],ROTATE(PLUS(x[ 8],x[11]),13));
|
||||
x[10] = XOR(x[10],ROTATE(PLUS(x[ 9],x[ 8]),18));
|
||||
x[12] = XOR(x[12],ROTATE(PLUS(x[15],x[14]), 7));
|
||||
x[13] = XOR(x[13],ROTATE(PLUS(x[12],x[15]), 9));
|
||||
x[14] = XOR(x[14],ROTATE(PLUS(x[13],x[12]),13));
|
||||
x[15] = XOR(x[15],ROTATE(PLUS(x[14],x[13]),18));
|
||||
}
|
||||
for (i = 0;i < 16;++i) x[i] = PLUS(x[i],input[i]);
|
||||
for (i = 0;i < 16;++i) U32TO8_LITTLE(output + 4 * i,x[i]);
|
||||
}
|
||||
|
||||
void ECRYPT_init(void)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static const char sigma[16] = "expand 32-byte k";
|
||||
static const char tau[16] = "expand 16-byte k";
|
||||
|
||||
void ECRYPT_keysetup(ECRYPT_ctx *x,const u8 *k,u32 kbits,u32 ivbits)
|
||||
{
|
||||
int i;
|
||||
static const char *constants;
|
||||
|
||||
x->input[1] = U8TO32_LITTLE(k + 0);
|
||||
x->input[2] = U8TO32_LITTLE(k + 4);
|
||||
x->input[3] = U8TO32_LITTLE(k + 8);
|
||||
x->input[4] = U8TO32_LITTLE(k + 12);
|
||||
if (kbits == 256) { /* recommended */
|
||||
k += 16;
|
||||
constants = sigma;
|
||||
} else { /* kbits == 128 */
|
||||
constants = tau;
|
||||
}
|
||||
x->input[11] = U8TO32_LITTLE(k + 0);
|
||||
x->input[12] = U8TO32_LITTLE(k + 4);
|
||||
x->input[13] = U8TO32_LITTLE(k + 8);
|
||||
x->input[14] = U8TO32_LITTLE(k + 12);
|
||||
x->input[0] = U8TO32_LITTLE(constants + 0);
|
||||
x->input[5] = U8TO32_LITTLE(constants + 4);
|
||||
x->input[10] = U8TO32_LITTLE(constants + 8);
|
||||
x->input[15] = U8TO32_LITTLE(constants + 12);
|
||||
}
|
||||
|
||||
void ECRYPT_ivsetup(ECRYPT_ctx *x,const u8 *iv)
|
||||
{
|
||||
x->input[6] = U8TO32_LITTLE(iv + 0);
|
||||
x->input[7] = U8TO32_LITTLE(iv + 4);
|
||||
x->input[8] = 0;
|
||||
x->input[9] = 0;
|
||||
}
|
||||
|
||||
void ECRYPT_encrypt_bytes(ECRYPT_ctx *x,const u8 *m,u8 *c,u32 bytes)
|
||||
{
|
||||
u8 output[64];
|
||||
int i;
|
||||
|
||||
if (!bytes) return;
|
||||
for (;;) {
|
||||
salsa20_wordtobyte(output,x->input);
|
||||
x->input[8] = PLUSONE(x->input[8]);
|
||||
if (!x->input[8]) {
|
||||
x->input[9] = PLUSONE(x->input[9]);
|
||||
/* stopping at 2^70 bytes per nonce is user's responsibility */
|
||||
}
|
||||
if (bytes <= 64) {
|
||||
for (i = 0;i < bytes;++i) c[i] = m[i] ^ output[i];
|
||||
return;
|
||||
}
|
||||
for (i = 0;i < 64;++i) c[i] = m[i] ^ output[i];
|
||||
bytes -= 64;
|
||||
c += 64;
|
||||
m += 64;
|
||||
}
|
||||
}
|
||||
|
||||
void ECRYPT_decrypt_bytes(ECRYPT_ctx *x,const u8 *c,u8 *m,u32 bytes)
|
||||
{
|
||||
ECRYPT_encrypt_bytes(x,c,m,bytes);
|
||||
}
|
||||
|
||||
void ECRYPT_keystream_bytes(ECRYPT_ctx *x,u8 *stream,u32 bytes)
|
||||
{
|
||||
u32 i;
|
||||
for (i = 0;i < bytes;++i) stream[i] = 0;
|
||||
ECRYPT_encrypt_bytes(x,stream,stream,bytes);
|
||||
}
|
||||
5371
vendor/salsa20/salsa20.s
vendored
5371
vendor/salsa20/salsa20.s
vendored
File diff suppressed because it is too large
Load diff
12
vendor/scrypt-jane/CMakeLists.txt
vendored
12
vendor/scrypt-jane/CMakeLists.txt
vendored
|
|
@ -1,12 +0,0 @@
|
|||
if( "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" )
|
||||
set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -no-integrated-as" )
|
||||
endif()
|
||||
|
||||
add_definitions( -DSCRYPT_SALSA )
|
||||
add_definitions( -DSCRYPT_SHA256 )
|
||||
|
||||
set( scrypt_sources
|
||||
scrypt-jane.c
|
||||
)
|
||||
|
||||
add_library( scrypt ${scrypt_sources} )
|
||||
163
vendor/scrypt-jane/README.md
vendored
163
vendor/scrypt-jane/README.md
vendored
|
|
@ -1,163 +0,0 @@
|
|||
This project provides a performant, flexible implementations of Colin Percival's [scrypt](http://www.tarsnap.com/scrypt.html).
|
||||
|
||||
# Features
|
||||
|
||||
## Modular Design
|
||||
|
||||
The code uses a modular (compile, not runtime) layout to allow new mixing & hash functions to be added easily. The base components (HMAC, PBKDF2, and scrypt) are static and will immediately work with any conforming mix or hash function.
|
||||
|
||||
## Supported Mix Functions
|
||||
|
||||
* [Salsa20/8](http://cr.yp.to/salsa20.html)
|
||||
* [ChaCha20/8](http://cr.yp.to/chacha.html)
|
||||
* [Salsa6420/8]()
|
||||
|
||||
I am not actually aware of any other candidates for a decent mix function. Salsa20/8 was nearly perfect, but its successor, ChaCha20/8, has better diffusion and is thus stronger, is potentially faster given advanced SIMD support (byte level shuffles, or a 32bit rotate), and is slightly cleaner to implement given that it requires no pre/post processing of data for SIMD implementations.
|
||||
|
||||
64-byte blocks are no longer assumed! Salsa6420/8 is a 'proof of concept' 64-bit version of Salsa20/8 with a 128 byte block, and rotation constants chosen to allow 32-bit word shuffles instead of rotations for two of the rotations which put it on par with ChaCha in terms of SSE implementation shortcuts.
|
||||
|
||||
## Supported Hash Functions
|
||||
|
||||
* SHA256/512
|
||||
* [BLAKE256/512](https://www.131002.net/blake/)
|
||||
* [Skein512](http://www.skein-hash.info/)
|
||||
* [Keccak256/512](http://keccak.noekeon.org/) (SHA-3)
|
||||
|
||||
Hash function implementations, unlike mix functions, are not optimized. The PBKDF2 computations are relatively minor in the scrypt algorithm, so including CPU specific versions, or vastly unrolling loops, would serve little purpose while bloating the code, both source and binary, and making it more confusing to implement correctly.
|
||||
|
||||
Most (now only two!) of the SHA-3 candidates fall in to the "annoying to read/implement" category and have not been included yet. This will of course be moot once ~~BLAKE is chosen as SHA-3~~ Keccak is chosen as SHA-3. Well shit.
|
||||
|
||||
## CPU Adaptation
|
||||
|
||||
The mixing function specialization is selected at runtime based on what the CPU supports (well, x86/x86-64 for now, but theoretically any). On platforms where this is not needed, e.g. where packages are usually compiled from source, it can also select the most suitable implementation at compile time, cutting down on binary size.
|
||||
|
||||
For those who are familiar with the scrypt spec, the code specializes at the ROMix level, allowing all copy, and xor calls to be inlined efficiently. ***Update***: This is actually not as important as I switched from specializing at the mix() level and letting the compiler somewhat inefficiently inline block_copy and block_xor to specializing at ChunkMix(), where they can be inlined properly. I thought about specializing at ROMix(), but it would increase the complexity per mix function even more and would not present many more opportunities than what is generated by the compiler presently.
|
||||
|
||||
MSVC uses SSE intrinsics as opposed to inline assembly for the mix functions to allow the compiler to fully inline properly. Also, Visual Studio is not smart enough to allow inline assembly in 64-bit code.
|
||||
|
||||
## Self Testing
|
||||
|
||||
On first use, scrypt() runs a small series of tests to make sure the hash function, mix functions, and scrypt() itself, are generating correct results. It will exit() (or call a user defined fatal error function) should any of these tests fail.
|
||||
|
||||
Test vectors for individual mix and hash functions are generated from reference implementations. The only "official" test vectors for the full scrypt() are for SHA256 + Salsa20/8 of course; other combinations are generated from this code (once it works with all reference test vectors) and subject to change if any implementation errors are discovered.
|
||||
|
||||
# Performance (on an E5200 2.5GHZ)
|
||||
|
||||
Benchmarks are run _without_ allocating memory, i.e. allocating enough memory before the trials are run. Different allocators can have different costs and non-deterministic effects, which is not the point of comparing implementations. The only hash function compared will be SHA-256 to be comparable to Colin's reference implementation, and the hash function will generally be a fraction of a percent of noise in the overall result.
|
||||
|
||||
Three different scrypt settings are tested (the last two are from the scrypt paper):
|
||||
|
||||
* 'High Volume': N=4096, r=8, p=1, 4mb memory
|
||||
* 'Interactive': N=16384, r=8, p=1, 16mb memory
|
||||
* 'Non-Interactive': N=1048576, r=8, p=1, 1gb memory
|
||||
|
||||
__Note__: Benchmark settings are adjusted based on the underlying block size to keep memory usage consistent with default scrypt. This means Salsa64 has r=4 due to having a 128 byte block size. A 256 byte block size would have r=2, 512 byte block would have r=1, etc. Additionally, this means Salsa6420/8 is doing half the rounds/byte of default scrypt, but has 64 bit word mixing vs 32 bit, and thus does somewhat less overall mixing. Salsa6420/~10-12 would be needed to maintain equivalent overall mixing.
|
||||
|
||||
Cycle counts are in millions of cycles. All versions compiled with gcc 4.6.3, -O3. Sorted from fastest to slowest.
|
||||
|
||||
Scaling refers to how much more expensive 'Non-Interactive' is to compute than 'High Volume', normalized to "ideal" scaling (256x difficulty). Under 100% means it becomes easier to process as N grows, over 100% means it becomes more difficult to process as N grows.
|
||||
|
||||
|
||||
<table>
|
||||
<thead><tr><th>Implemenation</th><th>Algo</th><th>High Volume</th><th>Interactive</th><th>Non-Interactive</th><th>Scaling</th></tr></thead>
|
||||
<tbody>
|
||||
|
||||
<tr><td>scrypt-jane SSSE3 64bit</td><td>Salsa6420/8 </td><td>18.2m</td><td> 75.6m</td><td>5120.0m</td><td>110.0%</td></tr>
|
||||
<tr><td>scrypt-jane SSSE3 64bit</td><td>ChaCha20/8 </td><td>19.6m</td><td> 79.6m</td><td>5296.7m</td><td>105.6%</td></tr>
|
||||
<tr><td>scrypt-jane SSSE3 32bit</td><td>ChaCha20/8 </td><td>19.8m</td><td> 80.3m</td><td>5346.1m</td><td>105.5%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 64bit </td><td>Salsa6420/8 </td><td>19.8m</td><td> 82.1m</td><td>5529.2m</td><td>109.1%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 64bit </td><td>Salsa20/8 </td><td>22.1m</td><td> 89.7m</td><td>5938.8m</td><td>105.0%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 32bit </td><td>Salsa20/8 </td><td>22.3m</td><td> 90.6m</td><td>6011.0m</td><td>105.3%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 64bit </td><td>ChaCha20/8 </td><td>23.9m</td><td> 96.8m</td><td>6399.7m</td><td>104.6%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 32bit </td><td>ChaCha20/8 </td><td>24.2m</td><td> 98.3m</td><td>6500.7m</td><td>104.9%</td></tr>
|
||||
<tr><td>*Reference SSE2 64bit* </td><td>Salsa20/8 </td><td>32.9m</td><td>135.2m</td><td>8881.6m</td><td>105.5%</td></tr>
|
||||
<tr><td>*Reference SSE2 32bit* </td><td>Salsa20/8 </td><td>33.0m</td><td>134.4m</td><td>8885.2m</td><td>105.2%</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
* scrypt-jane Salsa6420/8-SSSE3 is ~1.80x faster than reference Salsa20/8-SSE2 for High Volume, but drops to 1.73x faster for 'Non-Interactive' instead of remaining constant
|
||||
* scrypt-jane ChaCha20/8-SSSE3 is ~1.67x faster than reference Salsa20/8-SSE2
|
||||
* scrypt-jane Salsa20/8-SSE2 is ~1.48x faster than reference Salsa20/8-SSE2
|
||||
|
||||
# Performance (on a slightly noisy E3-1270 3.4GHZ)
|
||||
|
||||
All versions compiled with gcc 4.4.7, -O3. Sorted from fastest to slowest.
|
||||
|
||||
<table>
|
||||
<thead><tr><th>Implemenation</th><th>Algo</th><th>High Volume</th><th>Interactive</th><th>Non-Interactive</th><th>Scaling</th></tr></thead>
|
||||
<tbody>
|
||||
<tr><td>scrypt-jane AVX 64bit </td><td>Salsa6420/8 </td><td>11.8m</td><td> 52.5m</td><td>3848.6m</td><td>127.4%</td></tr>
|
||||
<tr><td>scrypt-jane SSSE3 64bit </td><td>Salsa6420/8 </td><td>13.3m</td><td> 57.9m</td><td>4176.6m</td><td>122.7%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 64bit </td><td>Salsa6420/8 </td><td>14.2m</td><td> 61.1m</td><td>4382.4m</td><td>120.6%</td></tr>
|
||||
<tr><td>scrypt-jane AVX 64bit </td><td>ChaCha20/8 </td><td>18.0m</td><td> 77.4m</td><td>5396.8m</td><td>117.1%</td></tr>
|
||||
<tr><td>scrypt-jane AVX 32bit </td><td>ChaCha20/8 </td><td>18.3m</td><td> 82.1m</td><td>5421.8m</td><td>115.7%</td></tr>
|
||||
<tr><td>scrypt-jane SSSE3 64bit </td><td>ChaCha20/8 </td><td>19.0m</td><td> 81.3m</td><td>5600.7m</td><td>115.1%</td></tr>
|
||||
<tr><td>scrypt-jane AVX 64bit </td><td>Salsa20/8 </td><td>19.0m</td><td> 81.2m</td><td>5610.6m</td><td>115.3%</td></tr>
|
||||
<tr><td>scrypt-jane AVX 32bit </td><td>Salsa20/8 </td><td>19.0m</td><td> 81.3m</td><td>5621.6m</td><td>115.6%</td></tr>
|
||||
<tr><td>scrypt-jane SSSE3 32bit </td><td>ChaCha20/8 </td><td>19.1m</td><td> 81.8m</td><td>5621.6m</td><td>115.0%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 64bit </td><td>Salsa20/8 </td><td>19.5m</td><td> 83.8m</td><td>5772.9m</td><td>115.6%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 32bit </td><td>Salsa20/8 </td><td>19.6m</td><td> 84.0m</td><td>5793.9m</td><td>115.5%</td></tr>
|
||||
<tr><td>*Reference SSE2/AVX 64bit* </td><td>Salsa20/8 </td><td>21.5m</td><td> 90.4m</td><td>6147.1m</td><td>111.7%</td></tr>
|
||||
<tr><td>*Reference SSE2/AVX 32bit* </td><td>Salsa20/8 </td><td>22.3m</td><td> 94.0m</td><td>6267.7m</td><td>110.0%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 64bit </td><td>ChaCha20/8 </td><td>23.1m</td><td> 97.7m</td><td>6670.0m</td><td>112.8%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 32bit </td><td>ChaCha20/8 </td><td>23.3m</td><td> 98.4m</td><td>6728.7m</td><td>112.8%</td></tr>
|
||||
<tr><td>*Reference SSE2 64bit* </td><td>Salsa20/8 </td><td>30.4m</td><td>125.6m</td><td>8139.4m</td><td>104.6%</td></tr>
|
||||
<tr><td>*Reference SSE2 32bit* </td><td>Salsa20/8 </td><td>30.0m</td><td>124.5m</td><td>8469.3m</td><td>110.3%</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
* scrypt-jane Salsa6420/8-AVX is 1.60x - 1.82x faster than reference Salsa20/8-SSE2/AVX
|
||||
* scrypt-jane ChaCha20/8-AVX is 1.13x - 1.19x faster than reference Salsa20/8-SSE2/AVX
|
||||
* scrypt-jane Salsa20/8-AVX is 1.09x - 1.13x faster than reference Salsa20/8-SSE2/AVX
|
||||
|
||||
|
||||
# Building
|
||||
|
||||
[gcc,icc,clang] scrypt-jane.c -O3 -[m32,m64] -DSCRYPT_MIX -DSCRYPT_HASH -c
|
||||
|
||||
where SCRYPT_MIX is one of
|
||||
|
||||
* SCRYPT_SALSA
|
||||
* SCRYPT_SALSA64 (no optimized 32-bit implementation)
|
||||
* SCRYPT_CHACHA
|
||||
|
||||
and SCRYPT_HASH is one of
|
||||
|
||||
* SCRYPT_SHA256
|
||||
* SCRYPT_SHA512
|
||||
* SCRYPT_BLAKE256
|
||||
* SCRYPT_BLAKE512
|
||||
* SCRYPT_SKEIN512
|
||||
* SCRYPT_KECCAK256
|
||||
* SCRYPT_KECCAK512
|
||||
|
||||
e.g.
|
||||
|
||||
gcc scrypt-jane.c -O3 -DSCRYPT_CHACHA -DSCRYPT_BLAKE512 -c
|
||||
gcc example.c scrypt-jane.o -o example
|
||||
|
||||
clang *may* need "-no-integrated-as" as some? versions don't support ".intel_syntax"
|
||||
|
||||
# Using
|
||||
|
||||
#include "scrypt-jane.h"
|
||||
|
||||
scrypt(password, password_len, salt, salt_len, Nfactor, pfactor, rfactor, out, want_bytes);
|
||||
|
||||
## scrypt parameters
|
||||
|
||||
* Nfactor: Increases CPU & Memory Hardness
|
||||
* rfactor: Increases Memory Hardness
|
||||
* pfactor: Increases CPU Hardness
|
||||
|
||||
In scrypt terms
|
||||
|
||||
* N = (1 << (Nfactor + 1)), which controls how many times to mix each chunk, and how many temporary chunks are used. Increasing N increases both CPU time and memory used.
|
||||
* r = (1 << rfactor), which controls how many blocks are in a chunk (i.e., 2 * r blocks are in a chunk). Increasing r increases how much memory is used.
|
||||
* p = (1 << pfactor), which controls how many passes to perform over the set of N chunks. Increasing p increases CPU time used.
|
||||
|
||||
I chose to use the log2 of each parameter as it is the common way to communicate settings (e.g. 2^20, not 1048576).
|
||||
|
||||
# License
|
||||
|
||||
Public Domain, or MIT
|
||||
28
vendor/scrypt-jane/code/scrypt-conf.h
vendored
28
vendor/scrypt-jane/code/scrypt-conf.h
vendored
|
|
@ -1,28 +0,0 @@
|
|||
/*
|
||||
pick the best algo at runtime or compile time?
|
||||
----------------------------------------------
|
||||
SCRYPT_CHOOSE_COMPILETIME (gcc only!)
|
||||
SCRYPT_CHOOSE_RUNTIME
|
||||
*/
|
||||
#define SCRYPT_CHOOSE_RUNTIME
|
||||
|
||||
|
||||
/*
|
||||
hash function to use
|
||||
-------------------------------
|
||||
SCRYPT_BLAKE256
|
||||
SCRYPT_BLAKE512
|
||||
SCRYPT_SHA256
|
||||
SCRYPT_SHA512
|
||||
SCRYPT_SKEIN512
|
||||
*/
|
||||
//#define SCRYPT_SHA256
|
||||
|
||||
|
||||
/*
|
||||
block mixer to use
|
||||
-----------------------------
|
||||
SCRYPT_CHACHA
|
||||
SCRYPT_SALSA
|
||||
*/
|
||||
//#define SCRYPT_SALSA
|
||||
162
vendor/scrypt-jane/code/scrypt-jane-chacha.h
vendored
162
vendor/scrypt-jane/code/scrypt-jane-chacha.h
vendored
|
|
@ -1,162 +0,0 @@
|
|||
#define SCRYPT_MIX_BASE "ChaCha20/8"
|
||||
|
||||
typedef uint32_t scrypt_mix_word_t;
|
||||
|
||||
#define SCRYPT_WORDTO8_LE U32TO8_LE
|
||||
#define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP
|
||||
|
||||
#define SCRYPT_BLOCK_BYTES 64
|
||||
#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t))
|
||||
|
||||
/* must have these here in case block bytes is ever != 64 */
|
||||
#include "scrypt-jane-romix-basic.h"
|
||||
|
||||
#include "scrypt-jane-mix_chacha-xop.h"
|
||||
#include "scrypt-jane-mix_chacha-avx.h"
|
||||
#include "scrypt-jane-mix_chacha-ssse3.h"
|
||||
#include "scrypt-jane-mix_chacha-sse2.h"
|
||||
#include "scrypt-jane-mix_chacha.h"
|
||||
|
||||
#if defined(SCRYPT_CHACHA_XOP)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_xop
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_xop
|
||||
#define SCRYPT_MIX_FN chacha_core_xop
|
||||
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_AVX)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_avx
|
||||
#define SCRYPT_MIX_FN chacha_core_avx
|
||||
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSSE3)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_ssse3
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_ssse3
|
||||
#define SCRYPT_MIX_FN chacha_core_ssse3
|
||||
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSE2)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_sse2
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_sse2
|
||||
#define SCRYPT_MIX_FN chacha_core_sse2
|
||||
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
/* cpu agnostic */
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_basic
|
||||
#define SCRYPT_MIX_FN chacha_core_basic
|
||||
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_convert_endian
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_convert_endian
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
static scrypt_ROMixfn
|
||||
scrypt_getROMix(void) {
|
||||
size_t cpuflags = detect_cpu();
|
||||
|
||||
#if defined(SCRYPT_CHACHA_XOP)
|
||||
if (cpuflags & cpu_xop)
|
||||
return scrypt_ROMix_xop;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_AVX)
|
||||
if (cpuflags & cpu_avx)
|
||||
return scrypt_ROMix_avx;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSSE3)
|
||||
if (cpuflags & cpu_ssse3)
|
||||
return scrypt_ROMix_ssse3;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSE2)
|
||||
if (cpuflags & cpu_sse2)
|
||||
return scrypt_ROMix_sse2;
|
||||
else
|
||||
#endif
|
||||
|
||||
return scrypt_ROMix_basic;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(SCRYPT_TEST_SPEED)
|
||||
static size_t
|
||||
available_implementations(void) {
|
||||
size_t cpuflags = detect_cpu();
|
||||
size_t flags = 0;
|
||||
|
||||
#if defined(SCRYPT_CHACHA_XOP)
|
||||
if (cpuflags & cpu_xop)
|
||||
flags |= cpu_xop;
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_AVX)
|
||||
if (cpuflags & cpu_avx)
|
||||
flags |= cpu_avx;
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSSE3)
|
||||
if (cpuflags & cpu_ssse3)
|
||||
flags |= cpu_ssse3;
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSE2)
|
||||
if (cpuflags & cpu_sse2)
|
||||
flags |= cpu_sse2;
|
||||
#endif
|
||||
|
||||
return flags;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
scrypt_test_mix(void) {
|
||||
static const uint8_t expected[16] = {
|
||||
0x48,0x2b,0x2d,0xb8,0xa1,0x33,0x22,0x73,0xcd,0x16,0xc4,0xb4,0xb0,0x7f,0xb1,0x8a,
|
||||
};
|
||||
|
||||
int ret = 1;
|
||||
size_t cpuflags = detect_cpu();
|
||||
|
||||
#if defined(SCRYPT_CHACHA_XOP)
|
||||
if (cpuflags & cpu_xop)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_xop, scrypt_romix_nop, scrypt_romix_nop, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_AVX)
|
||||
if (cpuflags & cpu_avx)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx, scrypt_romix_nop, scrypt_romix_nop, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSSE3)
|
||||
if (cpuflags & cpu_ssse3)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_ssse3, scrypt_romix_nop, scrypt_romix_nop, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSE2)
|
||||
if (cpuflags & cpu_sse2)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_sse2, scrypt_romix_nop, scrypt_romix_nop, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_BASIC)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_basic, scrypt_romix_convert_endian, scrypt_romix_convert_endian, expected);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
48
vendor/scrypt-jane/code/scrypt-jane-hash.h
vendored
48
vendor/scrypt-jane/code/scrypt-jane-hash.h
vendored
|
|
@ -1,48 +0,0 @@
|
|||
#if defined(SCRYPT_BLAKE512)
|
||||
#include "scrypt-jane-hash_blake512.h"
|
||||
#elif defined(SCRYPT_BLAKE256)
|
||||
#include "scrypt-jane-hash_blake256.h"
|
||||
#elif defined(SCRYPT_SHA512)
|
||||
#include "scrypt-jane-hash_sha512.h"
|
||||
#elif defined(SCRYPT_SHA256)
|
||||
#include "scrypt-jane-hash_sha256.h"
|
||||
#elif defined(SCRYPT_SKEIN512)
|
||||
#include "scrypt-jane-hash_skein512.h"
|
||||
#elif defined(SCRYPT_KECCAK512) || defined(SCRYPT_KECCAK256)
|
||||
#include "scrypt-jane-hash_keccak.h"
|
||||
#else
|
||||
#define SCRYPT_HASH "ERROR"
|
||||
#define SCRYPT_HASH_BLOCK_SIZE 64
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 64
|
||||
typedef struct scrypt_hash_state_t { size_t dummy; } scrypt_hash_state;
|
||||
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
||||
static void scrypt_hash_init(scrypt_hash_state *S) {}
|
||||
static void scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {}
|
||||
static void scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {}
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {0};
|
||||
#error must define a hash function!
|
||||
#endif
|
||||
|
||||
#include "scrypt-jane-pbkdf2.h"
|
||||
|
||||
#define SCRYPT_TEST_HASH_LEN 257 /* (2 * largest block size) + 1 */
|
||||
|
||||
static int
|
||||
scrypt_test_hash(void) {
|
||||
scrypt_hash_state st;
|
||||
scrypt_hash_digest hash, final;
|
||||
uint8_t msg[SCRYPT_TEST_HASH_LEN];
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < SCRYPT_TEST_HASH_LEN; i++)
|
||||
msg[i] = (uint8_t)i;
|
||||
|
||||
scrypt_hash_init(&st);
|
||||
for (i = 0; i < SCRYPT_TEST_HASH_LEN + 1; i++) {
|
||||
scrypt_hash(hash, msg, i);
|
||||
scrypt_hash_update(&st, hash, sizeof(hash));
|
||||
}
|
||||
scrypt_hash_finish(&st, final);
|
||||
return scrypt_verify(final, scrypt_test_hash_expected, SCRYPT_HASH_DIGEST_SIZE);
|
||||
}
|
||||
|
||||
177
vendor/scrypt-jane/code/scrypt-jane-hash_blake256.h
vendored
177
vendor/scrypt-jane/code/scrypt-jane-hash_blake256.h
vendored
|
|
@ -1,177 +0,0 @@
|
|||
#define SCRYPT_HASH "BLAKE-256"
|
||||
#define SCRYPT_HASH_BLOCK_SIZE 64
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 32
|
||||
|
||||
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
||||
|
||||
const uint8_t blake256_sigma[] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,
|
||||
14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3,
|
||||
11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4,
|
||||
7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8,
|
||||
9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13,
|
||||
2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9,
|
||||
12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11,
|
||||
13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10,
|
||||
6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5,
|
||||
10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13 ,0,
|
||||
};
|
||||
|
||||
const uint32_t blake256_constants[16] = {
|
||||
0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
|
||||
0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917
|
||||
};
|
||||
|
||||
typedef struct scrypt_hash_state_t {
|
||||
uint32_t H[8], T[2];
|
||||
uint32_t leftover;
|
||||
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
|
||||
} scrypt_hash_state;
|
||||
|
||||
static void
|
||||
blake256_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks) {
|
||||
const uint8_t *sigma, *sigma_end = blake256_sigma + (10 * 16);
|
||||
uint32_t m[16], v[16], h[8], t[2];
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < 8; i++) h[i] = S->H[i];
|
||||
for (i = 0; i < 2; i++) t[i] = S->T[i];
|
||||
|
||||
while (blocks--) {
|
||||
t[0] += 512;
|
||||
t[1] += (t[0] < 512) ? 1 : 0;
|
||||
|
||||
for (i = 0; i < 8; i++) v[i ] = h[i];
|
||||
for (i = 0; i < 4; i++) v[i + 8] = blake256_constants[i];
|
||||
for (i = 0; i < 2; i++) v[i + 12] = blake256_constants[i+4] ^ t[0];
|
||||
for (i = 0; i < 2; i++) v[i + 14] = blake256_constants[i+6] ^ t[1];
|
||||
|
||||
for (i = 0; i < 16; i++) m[i] = U8TO32_BE(&in[i * 4]);
|
||||
in += 64;
|
||||
|
||||
#define G(a,b,c,d,e) \
|
||||
v[a] += (m[sigma[e+0]] ^ blake256_constants[sigma[e+1]]) + v[b]; \
|
||||
v[d] = ROTR32(v[d] ^ v[a],16); \
|
||||
v[c] += v[d]; \
|
||||
v[b] = ROTR32(v[b] ^ v[c],12); \
|
||||
v[a] += (m[sigma[e+1]] ^ blake256_constants[sigma[e+0]]) + v[b]; \
|
||||
v[d] = ROTR32(v[d] ^ v[a], 8); \
|
||||
v[c] += v[d]; \
|
||||
v[b] = ROTR32(v[b] ^ v[c], 7);
|
||||
|
||||
for (i = 0, sigma = blake256_sigma; i < 14; i++) {
|
||||
G(0, 4, 8,12, 0);
|
||||
G(1, 5, 9,13, 2);
|
||||
G(2, 6,10,14, 4);
|
||||
G(3, 7,11,15, 6);
|
||||
|
||||
G(0, 5,10,15, 8);
|
||||
G(1, 6,11,12,10);
|
||||
G(2, 7, 8,13,12);
|
||||
G(3, 4, 9,14,14);
|
||||
|
||||
sigma += 16;
|
||||
if (sigma == sigma_end)
|
||||
sigma = blake256_sigma;
|
||||
}
|
||||
|
||||
#undef G
|
||||
|
||||
for (i = 0; i < 8; i++) h[i] ^= (v[i] ^ v[i + 8]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++) S->H[i] = h[i];
|
||||
for (i = 0; i < 2; i++) S->T[i] = t[i];
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_init(scrypt_hash_state *S) {
|
||||
S->H[0] = 0x6a09e667ULL;
|
||||
S->H[1] = 0xbb67ae85ULL;
|
||||
S->H[2] = 0x3c6ef372ULL;
|
||||
S->H[3] = 0xa54ff53aULL;
|
||||
S->H[4] = 0x510e527fULL;
|
||||
S->H[5] = 0x9b05688cULL;
|
||||
S->H[6] = 0x1f83d9abULL;
|
||||
S->H[7] = 0x5be0cd19ULL;
|
||||
S->T[0] = 0;
|
||||
S->T[1] = 0;
|
||||
S->leftover = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
|
||||
size_t blocks, want;
|
||||
|
||||
/* handle the previous data */
|
||||
if (S->leftover) {
|
||||
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
|
||||
want = (want < inlen) ? want : inlen;
|
||||
memcpy(S->buffer + S->leftover, in, want);
|
||||
S->leftover += (uint32_t)want;
|
||||
if (S->leftover < SCRYPT_HASH_BLOCK_SIZE)
|
||||
return;
|
||||
in += want;
|
||||
inlen -= want;
|
||||
blake256_blocks(S, S->buffer, 1);
|
||||
}
|
||||
|
||||
/* handle the current data */
|
||||
blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1));
|
||||
S->leftover = (uint32_t)(inlen - blocks);
|
||||
if (blocks) {
|
||||
blake256_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE);
|
||||
in += blocks;
|
||||
}
|
||||
|
||||
/* handle leftover data */
|
||||
if (S->leftover)
|
||||
memcpy(S->buffer, in, S->leftover);
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {
|
||||
uint32_t th, tl, bits;
|
||||
|
||||
bits = (S->leftover << 3);
|
||||
tl = S->T[0] + bits;
|
||||
th = S->T[1];
|
||||
if (S->leftover == 0) {
|
||||
S->T[0] = (uint32_t)0 - (uint32_t)512;
|
||||
S->T[1] = (uint32_t)0 - (uint32_t)1;
|
||||
} else if (S->T[0] == 0) {
|
||||
S->T[0] = ((uint32_t)0 - (uint32_t)512) + bits;
|
||||
S->T[1] = S->T[1] - 1;
|
||||
} else {
|
||||
S->T[0] -= (512 - bits);
|
||||
}
|
||||
|
||||
S->buffer[S->leftover] = 0x80;
|
||||
if (S->leftover <= 55) {
|
||||
memset(S->buffer + S->leftover + 1, 0, 55 - S->leftover);
|
||||
} else {
|
||||
memset(S->buffer + S->leftover + 1, 0, 63 - S->leftover);
|
||||
blake256_blocks(S, S->buffer, 1);
|
||||
S->T[0] = (uint32_t)0 - (uint32_t)512;
|
||||
S->T[1] = (uint32_t)0 - (uint32_t)1;
|
||||
memset(S->buffer, 0, 56);
|
||||
}
|
||||
S->buffer[55] |= 1;
|
||||
U32TO8_BE(S->buffer + 56, th);
|
||||
U32TO8_BE(S->buffer + 60, tl);
|
||||
blake256_blocks(S, S->buffer, 1);
|
||||
|
||||
U32TO8_BE(&hash[ 0], S->H[0]);
|
||||
U32TO8_BE(&hash[ 4], S->H[1]);
|
||||
U32TO8_BE(&hash[ 8], S->H[2]);
|
||||
U32TO8_BE(&hash[12], S->H[3]);
|
||||
U32TO8_BE(&hash[16], S->H[4]);
|
||||
U32TO8_BE(&hash[20], S->H[5]);
|
||||
U32TO8_BE(&hash[24], S->H[6]);
|
||||
U32TO8_BE(&hash[28], S->H[7]);
|
||||
}
|
||||
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
|
||||
0xcc,0xa9,0x1e,0xa9,0x20,0x97,0x37,0x40,0x17,0xc0,0xa0,0x52,0x87,0xfc,0x08,0x20,
|
||||
0x40,0xf5,0x81,0x86,0x62,0x75,0x78,0xb2,0x79,0xce,0xde,0x27,0x3c,0x7f,0x85,0xd8,
|
||||
};
|
||||
181
vendor/scrypt-jane/code/scrypt-jane-hash_blake512.h
vendored
181
vendor/scrypt-jane/code/scrypt-jane-hash_blake512.h
vendored
|
|
@ -1,181 +0,0 @@
|
|||
#define SCRYPT_HASH "BLAKE-512"
|
||||
#define SCRYPT_HASH_BLOCK_SIZE 128
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 64
|
||||
|
||||
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
||||
|
||||
const uint8_t blake512_sigma[] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,
|
||||
14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3,
|
||||
11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4,
|
||||
7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8,
|
||||
9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13,
|
||||
2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9,
|
||||
12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11,
|
||||
13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10,
|
||||
6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5,
|
||||
10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13 ,0,
|
||||
};
|
||||
|
||||
const uint64_t blake512_constants[16] = {
|
||||
0x243f6a8885a308d3ULL, 0x13198a2e03707344ULL, 0xa4093822299f31d0ULL, 0x082efa98ec4e6c89ULL,
|
||||
0x452821e638d01377ULL, 0xbe5466cf34e90c6cULL, 0xc0ac29b7c97c50ddULL, 0x3f84d5b5b5470917ULL,
|
||||
0x9216d5d98979fb1bULL, 0xd1310ba698dfb5acULL, 0x2ffd72dbd01adfb7ULL, 0xb8e1afed6a267e96ULL,
|
||||
0xba7c9045f12c7f99ULL, 0x24a19947b3916cf7ULL, 0x0801f2e2858efc16ULL, 0x636920d871574e69ULL
|
||||
};
|
||||
|
||||
typedef struct scrypt_hash_state_t {
|
||||
uint64_t H[8], T[2];
|
||||
uint32_t leftover;
|
||||
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
|
||||
} scrypt_hash_state;
|
||||
|
||||
static void
|
||||
blake512_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks) {
|
||||
const uint8_t *sigma, *sigma_end = blake512_sigma + (10 * 16);
|
||||
uint64_t m[16], v[16], h[8], t[2];
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < 8; i++) h[i] = S->H[i];
|
||||
for (i = 0; i < 2; i++) t[i] = S->T[i];
|
||||
|
||||
while (blocks--) {
|
||||
t[0] += 1024;
|
||||
t[1] += (t[0] < 1024) ? 1 : 0;
|
||||
|
||||
for (i = 0; i < 8; i++) v[i ] = h[i];
|
||||
for (i = 0; i < 4; i++) v[i + 8] = blake512_constants[i];
|
||||
for (i = 0; i < 2; i++) v[i + 12] = blake512_constants[i+4] ^ t[0];
|
||||
for (i = 0; i < 2; i++) v[i + 14] = blake512_constants[i+6] ^ t[1];
|
||||
|
||||
for (i = 0; i < 16; i++) m[i] = U8TO64_BE(&in[i * 8]);
|
||||
in += 128;
|
||||
|
||||
#define G(a,b,c,d,e) \
|
||||
v[a] += (m[sigma[e+0]] ^ blake512_constants[sigma[e+1]]) + v[b]; \
|
||||
v[d] = ROTR64(v[d] ^ v[a],32); \
|
||||
v[c] += v[d]; \
|
||||
v[b] = ROTR64(v[b] ^ v[c],25); \
|
||||
v[a] += (m[sigma[e+1]] ^ blake512_constants[sigma[e+0]]) + v[b]; \
|
||||
v[d] = ROTR64(v[d] ^ v[a],16); \
|
||||
v[c] += v[d]; \
|
||||
v[b] = ROTR64(v[b] ^ v[c],11);
|
||||
|
||||
for (i = 0, sigma = blake512_sigma; i < 16; i++) {
|
||||
G(0, 4, 8,12, 0);
|
||||
G(1, 5, 9,13, 2);
|
||||
G(2, 6,10,14, 4);
|
||||
G(3, 7,11,15, 6);
|
||||
G(0, 5,10,15, 8);
|
||||
G(1, 6,11,12,10);
|
||||
G(2, 7, 8,13,12);
|
||||
G(3, 4, 9,14,14);
|
||||
|
||||
sigma += 16;
|
||||
if (sigma == sigma_end)
|
||||
sigma = blake512_sigma;
|
||||
}
|
||||
|
||||
#undef G
|
||||
|
||||
for (i = 0; i < 8; i++) h[i] ^= (v[i] ^ v[i + 8]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++) S->H[i] = h[i];
|
||||
for (i = 0; i < 2; i++) S->T[i] = t[i];
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_init(scrypt_hash_state *S) {
|
||||
S->H[0] = 0x6a09e667f3bcc908ULL;
|
||||
S->H[1] = 0xbb67ae8584caa73bULL;
|
||||
S->H[2] = 0x3c6ef372fe94f82bULL;
|
||||
S->H[3] = 0xa54ff53a5f1d36f1ULL;
|
||||
S->H[4] = 0x510e527fade682d1ULL;
|
||||
S->H[5] = 0x9b05688c2b3e6c1fULL;
|
||||
S->H[6] = 0x1f83d9abfb41bd6bULL;
|
||||
S->H[7] = 0x5be0cd19137e2179ULL;
|
||||
S->T[0] = 0;
|
||||
S->T[1] = 0;
|
||||
S->leftover = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
|
||||
size_t blocks, want;
|
||||
|
||||
/* handle the previous data */
|
||||
if (S->leftover) {
|
||||
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
|
||||
want = (want < inlen) ? want : inlen;
|
||||
memcpy(S->buffer + S->leftover, in, want);
|
||||
S->leftover += (uint32_t)want;
|
||||
if (S->leftover < SCRYPT_HASH_BLOCK_SIZE)
|
||||
return;
|
||||
in += want;
|
||||
inlen -= want;
|
||||
blake512_blocks(S, S->buffer, 1);
|
||||
}
|
||||
|
||||
/* handle the current data */
|
||||
blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1));
|
||||
S->leftover = (uint32_t)(inlen - blocks);
|
||||
if (blocks) {
|
||||
blake512_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE);
|
||||
in += blocks;
|
||||
}
|
||||
|
||||
/* handle leftover data */
|
||||
if (S->leftover)
|
||||
memcpy(S->buffer, in, S->leftover);
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {
|
||||
uint64_t th, tl;
|
||||
size_t bits;
|
||||
|
||||
bits = (S->leftover << 3);
|
||||
tl = S->T[0] + bits;
|
||||
th = S->T[1];
|
||||
if (S->leftover == 0) {
|
||||
S->T[0] = (uint64_t)0 - (uint64_t)1024;
|
||||
S->T[1] = (uint64_t)0 - (uint64_t)1;
|
||||
} else if (S->T[0] == 0) {
|
||||
S->T[0] = ((uint64_t)0 - (uint64_t)1024) + bits;
|
||||
S->T[1] = S->T[1] - 1;
|
||||
} else {
|
||||
S->T[0] -= (1024 - bits);
|
||||
}
|
||||
|
||||
S->buffer[S->leftover] = 0x80;
|
||||
if (S->leftover <= 111) {
|
||||
memset(S->buffer + S->leftover + 1, 0, 111 - S->leftover);
|
||||
} else {
|
||||
memset(S->buffer + S->leftover + 1, 0, 127 - S->leftover);
|
||||
blake512_blocks(S, S->buffer, 1);
|
||||
S->T[0] = (uint64_t)0 - (uint64_t)1024;
|
||||
S->T[1] = (uint64_t)0 - (uint64_t)1;
|
||||
memset(S->buffer, 0, 112);
|
||||
}
|
||||
S->buffer[111] |= 1;
|
||||
U64TO8_BE(S->buffer + 112, th);
|
||||
U64TO8_BE(S->buffer + 120, tl);
|
||||
blake512_blocks(S, S->buffer, 1);
|
||||
|
||||
U64TO8_BE(&hash[ 0], S->H[0]);
|
||||
U64TO8_BE(&hash[ 8], S->H[1]);
|
||||
U64TO8_BE(&hash[16], S->H[2]);
|
||||
U64TO8_BE(&hash[24], S->H[3]);
|
||||
U64TO8_BE(&hash[32], S->H[4]);
|
||||
U64TO8_BE(&hash[40], S->H[5]);
|
||||
U64TO8_BE(&hash[48], S->H[6]);
|
||||
U64TO8_BE(&hash[56], S->H[7]);
|
||||
}
|
||||
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
|
||||
0x2f,0x9d,0x5b,0xbe,0x24,0x0d,0x63,0xd3,0xa0,0xac,0x4f,0xd3,0x01,0xc0,0x23,0x6f,
|
||||
0x6d,0xdf,0x6e,0xfb,0x60,0x6f,0xa0,0x74,0xdf,0x9f,0x25,0x65,0xb6,0x11,0x0a,0x83,
|
||||
0x23,0x96,0xba,0x91,0x68,0x4b,0x85,0x15,0x13,0x54,0xba,0x19,0xf3,0x2c,0x5a,0x4a,
|
||||
0x1f,0x78,0x31,0x02,0xc9,0x1e,0x56,0xc4,0x54,0xca,0xf9,0x8f,0x2c,0x7f,0x85,0xac
|
||||
};
|
||||
168
vendor/scrypt-jane/code/scrypt-jane-hash_keccak.h
vendored
168
vendor/scrypt-jane/code/scrypt-jane-hash_keccak.h
vendored
|
|
@ -1,168 +0,0 @@
|
|||
#if defined(SCRYPT_KECCAK256)
|
||||
#define SCRYPT_HASH "Keccak-256"
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 32
|
||||
#else
|
||||
#define SCRYPT_HASH "Keccak-512"
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 64
|
||||
#endif
|
||||
#define SCRYPT_KECCAK_F 1600
|
||||
#define SCRYPT_KECCAK_C (SCRYPT_HASH_DIGEST_SIZE * 8 * 2) /* 256=512, 512=1024 */
|
||||
#define SCRYPT_KECCAK_R (SCRYPT_KECCAK_F - SCRYPT_KECCAK_C) /* 256=1088, 512=576 */
|
||||
#define SCRYPT_HASH_BLOCK_SIZE (SCRYPT_KECCAK_R / 8)
|
||||
|
||||
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
||||
|
||||
typedef struct scrypt_hash_state_t {
|
||||
uint64_t state[SCRYPT_KECCAK_F / 64];
|
||||
uint32_t leftover;
|
||||
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
|
||||
} scrypt_hash_state;
|
||||
|
||||
static const uint64_t keccak_round_constants[24] = {
|
||||
0x0000000000000001ull, 0x0000000000008082ull,
|
||||
0x800000000000808aull, 0x8000000080008000ull,
|
||||
0x000000000000808bull, 0x0000000080000001ull,
|
||||
0x8000000080008081ull, 0x8000000000008009ull,
|
||||
0x000000000000008aull, 0x0000000000000088ull,
|
||||
0x0000000080008009ull, 0x000000008000000aull,
|
||||
0x000000008000808bull, 0x800000000000008bull,
|
||||
0x8000000000008089ull, 0x8000000000008003ull,
|
||||
0x8000000000008002ull, 0x8000000000000080ull,
|
||||
0x000000000000800aull, 0x800000008000000aull,
|
||||
0x8000000080008081ull, 0x8000000000008080ull,
|
||||
0x0000000080000001ull, 0x8000000080008008ull
|
||||
};
|
||||
|
||||
static void
|
||||
keccak_block(scrypt_hash_state *S, const uint8_t *in) {
|
||||
size_t i;
|
||||
uint64_t *s = S->state, t[5], u[5], v, w;
|
||||
|
||||
/* absorb input */
|
||||
for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE / 8; i++, in += 8)
|
||||
s[i] ^= U8TO64_LE(in);
|
||||
|
||||
for (i = 0; i < 24; i++) {
|
||||
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
|
||||
t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
|
||||
t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
|
||||
t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
|
||||
t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23];
|
||||
t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24];
|
||||
|
||||
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
|
||||
u[0] = t[4] ^ ROTL64(t[1], 1);
|
||||
u[1] = t[0] ^ ROTL64(t[2], 1);
|
||||
u[2] = t[1] ^ ROTL64(t[3], 1);
|
||||
u[3] = t[2] ^ ROTL64(t[4], 1);
|
||||
u[4] = t[3] ^ ROTL64(t[0], 1);
|
||||
|
||||
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
|
||||
s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0];
|
||||
s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1];
|
||||
s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2];
|
||||
s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3];
|
||||
s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4];
|
||||
|
||||
/* rho pi: b[..] = rotl(a[..], ..) */
|
||||
v = s[ 1];
|
||||
s[ 1] = ROTL64(s[ 6], 44);
|
||||
s[ 6] = ROTL64(s[ 9], 20);
|
||||
s[ 9] = ROTL64(s[22], 61);
|
||||
s[22] = ROTL64(s[14], 39);
|
||||
s[14] = ROTL64(s[20], 18);
|
||||
s[20] = ROTL64(s[ 2], 62);
|
||||
s[ 2] = ROTL64(s[12], 43);
|
||||
s[12] = ROTL64(s[13], 25);
|
||||
s[13] = ROTL64(s[19], 8);
|
||||
s[19] = ROTL64(s[23], 56);
|
||||
s[23] = ROTL64(s[15], 41);
|
||||
s[15] = ROTL64(s[ 4], 27);
|
||||
s[ 4] = ROTL64(s[24], 14);
|
||||
s[24] = ROTL64(s[21], 2);
|
||||
s[21] = ROTL64(s[ 8], 55);
|
||||
s[ 8] = ROTL64(s[16], 45);
|
||||
s[16] = ROTL64(s[ 5], 36);
|
||||
s[ 5] = ROTL64(s[ 3], 28);
|
||||
s[ 3] = ROTL64(s[18], 21);
|
||||
s[18] = ROTL64(s[17], 15);
|
||||
s[17] = ROTL64(s[11], 10);
|
||||
s[11] = ROTL64(s[ 7], 6);
|
||||
s[ 7] = ROTL64(s[10], 3);
|
||||
s[10] = ROTL64( v, 1);
|
||||
|
||||
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
|
||||
v = s[ 0]; w = s[ 1]; s[ 0] ^= (~w) & s[ 2]; s[ 1] ^= (~s[ 2]) & s[ 3]; s[ 2] ^= (~s[ 3]) & s[ 4]; s[ 3] ^= (~s[ 4]) & v; s[ 4] ^= (~v) & w;
|
||||
v = s[ 5]; w = s[ 6]; s[ 5] ^= (~w) & s[ 7]; s[ 6] ^= (~s[ 7]) & s[ 8]; s[ 7] ^= (~s[ 8]) & s[ 9]; s[ 8] ^= (~s[ 9]) & v; s[ 9] ^= (~v) & w;
|
||||
v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w;
|
||||
v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w;
|
||||
v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w;
|
||||
|
||||
/* iota: a[0,0] ^= round constant */
|
||||
s[0] ^= keccak_round_constants[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_init(scrypt_hash_state *S) {
|
||||
memset(S, 0, sizeof(*S));
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
|
||||
size_t want;
|
||||
|
||||
/* handle the previous data */
|
||||
if (S->leftover) {
|
||||
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
|
||||
want = (want < inlen) ? want : inlen;
|
||||
memcpy(S->buffer + S->leftover, in, want);
|
||||
S->leftover += (uint32_t)want;
|
||||
if (S->leftover < SCRYPT_HASH_BLOCK_SIZE)
|
||||
return;
|
||||
in += want;
|
||||
inlen -= want;
|
||||
keccak_block(S, S->buffer);
|
||||
}
|
||||
|
||||
/* handle the current data */
|
||||
while (inlen >= SCRYPT_HASH_BLOCK_SIZE) {
|
||||
keccak_block(S, in);
|
||||
in += SCRYPT_HASH_BLOCK_SIZE;
|
||||
inlen -= SCRYPT_HASH_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
/* handle leftover data */
|
||||
S->leftover = (uint32_t)inlen;
|
||||
if (S->leftover)
|
||||
memcpy(S->buffer, in, S->leftover);
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {
|
||||
size_t i;
|
||||
|
||||
S->buffer[S->leftover] = 0x01;
|
||||
memset(S->buffer + (S->leftover + 1), 0, SCRYPT_HASH_BLOCK_SIZE - (S->leftover + 1));
|
||||
S->buffer[SCRYPT_HASH_BLOCK_SIZE - 1] |= 0x80;
|
||||
keccak_block(S, S->buffer);
|
||||
|
||||
for (i = 0; i < SCRYPT_HASH_DIGEST_SIZE; i += 8) {
|
||||
U64TO8_LE(&hash[i], S->state[i / 8]);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(SCRYPT_KECCAK256)
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
|
||||
0x26,0xb7,0x10,0xb3,0x66,0xb1,0xd1,0xb1,0x25,0xfc,0x3e,0xe3,0x1e,0x33,0x1d,0x19,
|
||||
0x94,0xaa,0x63,0x7a,0xd5,0x77,0x29,0xb4,0x27,0xe9,0xe0,0xf4,0x19,0xba,0x68,0xea,
|
||||
};
|
||||
#else
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
|
||||
0x17,0xc7,0x8c,0xa0,0xd9,0x08,0x1d,0xba,0x8a,0xc8,0x3e,0x07,0x90,0xda,0x91,0x88,
|
||||
0x25,0xbd,0xd3,0xf8,0x78,0x4a,0x8d,0x5e,0xe4,0x96,0x9c,0x01,0xf3,0xeb,0xdc,0x12,
|
||||
0xea,0x35,0x57,0xba,0x94,0xb8,0xe9,0xb9,0x27,0x45,0x0a,0x48,0x5c,0x3d,0x69,0xf0,
|
||||
0xdb,0x22,0x38,0xb5,0x52,0x22,0x29,0xea,0x7a,0xb2,0xe6,0x07,0xaa,0x37,0x4d,0xe6,
|
||||
};
|
||||
#endif
|
||||
|
||||
135
vendor/scrypt-jane/code/scrypt-jane-hash_sha256.h
vendored
135
vendor/scrypt-jane/code/scrypt-jane-hash_sha256.h
vendored
|
|
@ -1,135 +0,0 @@
|
|||
#define SCRYPT_HASH "SHA-2-256"
|
||||
#define SCRYPT_HASH_BLOCK_SIZE 64
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 32
|
||||
|
||||
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
||||
|
||||
typedef struct scrypt_hash_state_t {
|
||||
uint32_t H[8];
|
||||
uint64_t T;
|
||||
uint32_t leftover;
|
||||
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
|
||||
} scrypt_hash_state;
|
||||
|
||||
static const uint32_t sha256_constants[64] = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
};
|
||||
|
||||
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
|
||||
#define Maj(x,y,z) (((x | y) & z) | (x & y))
|
||||
#define S0(x) (ROTR32(x, 2) ^ ROTR32(x, 13) ^ ROTR32(x, 22))
|
||||
#define S1(x) (ROTR32(x, 6) ^ ROTR32(x, 11) ^ ROTR32(x, 25))
|
||||
#define G0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ (x >> 3))
|
||||
#define G1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ (x >> 10))
|
||||
#define W0(in,i) (U8TO32_BE(&in[i * 4]))
|
||||
#define W1(i) (G1(w[i - 2]) + w[i - 7] + G0(w[i - 15]) + w[i - 16])
|
||||
#define STEP(i) \
|
||||
t1 = S0(r[0]) + Maj(r[0], r[1], r[2]); \
|
||||
t0 = r[7] + S1(r[4]) + Ch(r[4], r[5], r[6]) + sha256_constants[i] + w[i]; \
|
||||
r[7] = r[6]; \
|
||||
r[6] = r[5]; \
|
||||
r[5] = r[4]; \
|
||||
r[4] = r[3] + t0; \
|
||||
r[3] = r[2]; \
|
||||
r[2] = r[1]; \
|
||||
r[1] = r[0]; \
|
||||
r[0] = t0 + t1;
|
||||
|
||||
static void
|
||||
sha256_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks) {
|
||||
uint32_t r[8], w[64], t0, t1;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < 8; i++) r[i] = S->H[i];
|
||||
|
||||
while (blocks--) {
|
||||
for (i = 0; i < 16; i++) { w[i] = W0(in, i); }
|
||||
for (i = 16; i < 64; i++) { w[i] = W1(i); }
|
||||
for (i = 0; i < 64; i++) { STEP(i); }
|
||||
for (i = 0; i < 8; i++) { r[i] += S->H[i]; S->H[i] = r[i]; }
|
||||
S->T += SCRYPT_HASH_BLOCK_SIZE * 8;
|
||||
in += SCRYPT_HASH_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_init(scrypt_hash_state *S) {
|
||||
S->H[0] = 0x6a09e667;
|
||||
S->H[1] = 0xbb67ae85;
|
||||
S->H[2] = 0x3c6ef372;
|
||||
S->H[3] = 0xa54ff53a;
|
||||
S->H[4] = 0x510e527f;
|
||||
S->H[5] = 0x9b05688c;
|
||||
S->H[6] = 0x1f83d9ab;
|
||||
S->H[7] = 0x5be0cd19;
|
||||
S->T = 0;
|
||||
S->leftover = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
|
||||
size_t blocks, want;
|
||||
|
||||
/* handle the previous data */
|
||||
if (S->leftover) {
|
||||
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
|
||||
want = (want < inlen) ? want : inlen;
|
||||
memcpy(S->buffer + S->leftover, in, want);
|
||||
S->leftover += (uint32_t)want;
|
||||
if (S->leftover < SCRYPT_HASH_BLOCK_SIZE)
|
||||
return;
|
||||
in += want;
|
||||
inlen -= want;
|
||||
sha256_blocks(S, S->buffer, 1);
|
||||
}
|
||||
|
||||
/* handle the current data */
|
||||
blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1));
|
||||
S->leftover = (uint32_t)(inlen - blocks);
|
||||
if (blocks) {
|
||||
sha256_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE);
|
||||
in += blocks;
|
||||
}
|
||||
|
||||
/* handle leftover data */
|
||||
if (S->leftover)
|
||||
memcpy(S->buffer, in, S->leftover);
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {
|
||||
uint64_t t = S->T + (S->leftover * 8);
|
||||
|
||||
S->buffer[S->leftover] = 0x80;
|
||||
if (S->leftover <= 55) {
|
||||
memset(S->buffer + S->leftover + 1, 0, 55 - S->leftover);
|
||||
} else {
|
||||
memset(S->buffer + S->leftover + 1, 0, 63 - S->leftover);
|
||||
sha256_blocks(S, S->buffer, 1);
|
||||
memset(S->buffer, 0, 56);
|
||||
}
|
||||
|
||||
U64TO8_BE(S->buffer + 56, t);
|
||||
sha256_blocks(S, S->buffer, 1);
|
||||
|
||||
U32TO8_BE(&hash[ 0], S->H[0]);
|
||||
U32TO8_BE(&hash[ 4], S->H[1]);
|
||||
U32TO8_BE(&hash[ 8], S->H[2]);
|
||||
U32TO8_BE(&hash[12], S->H[3]);
|
||||
U32TO8_BE(&hash[16], S->H[4]);
|
||||
U32TO8_BE(&hash[20], S->H[5]);
|
||||
U32TO8_BE(&hash[24], S->H[6]);
|
||||
U32TO8_BE(&hash[28], S->H[7]);
|
||||
}
|
||||
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
|
||||
0xee,0x36,0xae,0xa6,0x65,0xf0,0x28,0x7d,0xc9,0xde,0xd8,0xad,0x48,0x33,0x7d,0xbf,
|
||||
0xcb,0xc0,0x48,0xfa,0x5f,0x92,0xfd,0x0a,0x95,0x6f,0x34,0x8e,0x8c,0x1e,0x73,0xad,
|
||||
};
|
||||
152
vendor/scrypt-jane/code/scrypt-jane-hash_sha512.h
vendored
152
vendor/scrypt-jane/code/scrypt-jane-hash_sha512.h
vendored
|
|
@ -1,152 +0,0 @@
|
|||
#define SCRYPT_HASH "SHA-2-512"
|
||||
#define SCRYPT_HASH_BLOCK_SIZE 128
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 64
|
||||
|
||||
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
||||
|
||||
typedef struct scrypt_hash_state_t {
|
||||
uint64_t H[8];
|
||||
uint64_t T[2];
|
||||
uint32_t leftover;
|
||||
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
|
||||
} scrypt_hash_state;
|
||||
|
||||
static const uint64_t sha512_constants[80] = {
|
||||
0x428a2f98d728ae22ull, 0x7137449123ef65cdull, 0xb5c0fbcfec4d3b2full, 0xe9b5dba58189dbbcull,
|
||||
0x3956c25bf348b538ull, 0x59f111f1b605d019ull, 0x923f82a4af194f9bull, 0xab1c5ed5da6d8118ull,
|
||||
0xd807aa98a3030242ull, 0x12835b0145706fbeull, 0x243185be4ee4b28cull, 0x550c7dc3d5ffb4e2ull,
|
||||
0x72be5d74f27b896full, 0x80deb1fe3b1696b1ull, 0x9bdc06a725c71235ull, 0xc19bf174cf692694ull,
|
||||
0xe49b69c19ef14ad2ull, 0xefbe4786384f25e3ull, 0x0fc19dc68b8cd5b5ull, 0x240ca1cc77ac9c65ull,
|
||||
0x2de92c6f592b0275ull, 0x4a7484aa6ea6e483ull, 0x5cb0a9dcbd41fbd4ull, 0x76f988da831153b5ull,
|
||||
0x983e5152ee66dfabull, 0xa831c66d2db43210ull, 0xb00327c898fb213full, 0xbf597fc7beef0ee4ull,
|
||||
0xc6e00bf33da88fc2ull, 0xd5a79147930aa725ull, 0x06ca6351e003826full, 0x142929670a0e6e70ull,
|
||||
0x27b70a8546d22ffcull, 0x2e1b21385c26c926ull, 0x4d2c6dfc5ac42aedull, 0x53380d139d95b3dfull,
|
||||
0x650a73548baf63deull, 0x766a0abb3c77b2a8ull, 0x81c2c92e47edaee6ull, 0x92722c851482353bull,
|
||||
0xa2bfe8a14cf10364ull, 0xa81a664bbc423001ull, 0xc24b8b70d0f89791ull, 0xc76c51a30654be30ull,
|
||||
0xd192e819d6ef5218ull, 0xd69906245565a910ull, 0xf40e35855771202aull, 0x106aa07032bbd1b8ull,
|
||||
0x19a4c116b8d2d0c8ull, 0x1e376c085141ab53ull, 0x2748774cdf8eeb99ull, 0x34b0bcb5e19b48a8ull,
|
||||
0x391c0cb3c5c95a63ull, 0x4ed8aa4ae3418acbull, 0x5b9cca4f7763e373ull, 0x682e6ff3d6b2b8a3ull,
|
||||
0x748f82ee5defb2fcull, 0x78a5636f43172f60ull, 0x84c87814a1f0ab72ull, 0x8cc702081a6439ecull,
|
||||
0x90befffa23631e28ull, 0xa4506cebde82bde9ull, 0xbef9a3f7b2c67915ull, 0xc67178f2e372532bull,
|
||||
0xca273eceea26619cull, 0xd186b8c721c0c207ull, 0xeada7dd6cde0eb1eull, 0xf57d4f7fee6ed178ull,
|
||||
0x06f067aa72176fbaull, 0x0a637dc5a2c898a6ull, 0x113f9804bef90daeull, 0x1b710b35131c471bull,
|
||||
0x28db77f523047d84ull, 0x32caab7b40c72493ull, 0x3c9ebe0a15c9bebcull, 0x431d67c49c100d4cull,
|
||||
0x4cc5d4becb3e42b6ull, 0x597f299cfc657e2aull, 0x5fcb6fab3ad6faecull, 0x6c44198c4a475817ull
|
||||
};
|
||||
|
||||
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
|
||||
#define Maj(x,y,z) (((x | y) & z) | (x & y))
|
||||
#define S0(x) (ROTR64(x, 28) ^ ROTR64(x, 34) ^ ROTR64(x, 39))
|
||||
#define S1(x) (ROTR64(x, 14) ^ ROTR64(x, 18) ^ ROTR64(x, 41))
|
||||
#define G0(x) (ROTR64(x, 1) ^ ROTR64(x, 8) ^ (x >> 7))
|
||||
#define G1(x) (ROTR64(x, 19) ^ ROTR64(x, 61) ^ (x >> 6))
|
||||
#define W0(in,i) (U8TO64_BE(&in[i * 8]))
|
||||
#define W1(i) (G1(w[i - 2]) + w[i - 7] + G0(w[i - 15]) + w[i - 16])
|
||||
#define STEP(i) \
|
||||
t1 = S0(r[0]) + Maj(r[0], r[1], r[2]); \
|
||||
t0 = r[7] + S1(r[4]) + Ch(r[4], r[5], r[6]) + sha512_constants[i] + w[i]; \
|
||||
r[7] = r[6]; \
|
||||
r[6] = r[5]; \
|
||||
r[5] = r[4]; \
|
||||
r[4] = r[3] + t0; \
|
||||
r[3] = r[2]; \
|
||||
r[2] = r[1]; \
|
||||
r[1] = r[0]; \
|
||||
r[0] = t0 + t1;
|
||||
|
||||
static void
|
||||
sha512_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks) {
|
||||
uint64_t r[8], w[80], t0, t1;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < 8; i++) r[i] = S->H[i];
|
||||
|
||||
while (blocks--) {
|
||||
for (i = 0; i < 16; i++) { w[i] = W0(in, i); }
|
||||
for (i = 16; i < 80; i++) { w[i] = W1(i); }
|
||||
for (i = 0; i < 80; i++) { STEP(i); }
|
||||
for (i = 0; i < 8; i++) { r[i] += S->H[i]; S->H[i] = r[i]; }
|
||||
S->T[0] += SCRYPT_HASH_BLOCK_SIZE * 8;
|
||||
S->T[1] += (!S->T[0]) ? 1 : 0;
|
||||
in += SCRYPT_HASH_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_init(scrypt_hash_state *S) {
|
||||
S->H[0] = 0x6a09e667f3bcc908ull;
|
||||
S->H[1] = 0xbb67ae8584caa73bull;
|
||||
S->H[2] = 0x3c6ef372fe94f82bull;
|
||||
S->H[3] = 0xa54ff53a5f1d36f1ull;
|
||||
S->H[4] = 0x510e527fade682d1ull;
|
||||
S->H[5] = 0x9b05688c2b3e6c1full;
|
||||
S->H[6] = 0x1f83d9abfb41bd6bull;
|
||||
S->H[7] = 0x5be0cd19137e2179ull;
|
||||
S->T[0] = 0;
|
||||
S->T[1] = 0;
|
||||
S->leftover = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
|
||||
size_t blocks, want;
|
||||
|
||||
/* handle the previous data */
|
||||
if (S->leftover) {
|
||||
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
|
||||
want = (want < inlen) ? want : inlen;
|
||||
memcpy(S->buffer + S->leftover, in, want);
|
||||
S->leftover += (uint32_t)want;
|
||||
if (S->leftover < SCRYPT_HASH_BLOCK_SIZE)
|
||||
return;
|
||||
in += want;
|
||||
inlen -= want;
|
||||
sha512_blocks(S, S->buffer, 1);
|
||||
}
|
||||
|
||||
/* handle the current data */
|
||||
blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1));
|
||||
S->leftover = (uint32_t)(inlen - blocks);
|
||||
if (blocks) {
|
||||
sha512_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE);
|
||||
in += blocks;
|
||||
}
|
||||
|
||||
/* handle leftover data */
|
||||
if (S->leftover)
|
||||
memcpy(S->buffer, in, S->leftover);
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {
|
||||
uint64_t t0 = S->T[0] + (S->leftover * 8), t1 = S->T[1];
|
||||
|
||||
S->buffer[S->leftover] = 0x80;
|
||||
if (S->leftover <= 111) {
|
||||
memset(S->buffer + S->leftover + 1, 0, 111 - S->leftover);
|
||||
} else {
|
||||
memset(S->buffer + S->leftover + 1, 0, 127 - S->leftover);
|
||||
sha512_blocks(S, S->buffer, 1);
|
||||
memset(S->buffer, 0, 112);
|
||||
}
|
||||
|
||||
U64TO8_BE(S->buffer + 112, t1);
|
||||
U64TO8_BE(S->buffer + 120, t0);
|
||||
sha512_blocks(S, S->buffer, 1);
|
||||
|
||||
U64TO8_BE(&hash[ 0], S->H[0]);
|
||||
U64TO8_BE(&hash[ 8], S->H[1]);
|
||||
U64TO8_BE(&hash[16], S->H[2]);
|
||||
U64TO8_BE(&hash[24], S->H[3]);
|
||||
U64TO8_BE(&hash[32], S->H[4]);
|
||||
U64TO8_BE(&hash[40], S->H[5]);
|
||||
U64TO8_BE(&hash[48], S->H[6]);
|
||||
U64TO8_BE(&hash[56], S->H[7]);
|
||||
}
|
||||
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
|
||||
0xba,0xc3,0x80,0x2b,0x24,0x56,0x95,0x1f,0x19,0x7c,0xa2,0xd3,0x72,0x7c,0x9a,0x4d,
|
||||
0x1d,0x50,0x3a,0xa9,0x12,0x27,0xd8,0xe1,0xbe,0x76,0x53,0x87,0x5a,0x1e,0x82,0xec,
|
||||
0xc8,0xe1,0x6b,0x87,0xd0,0xb5,0x25,0x7e,0xe8,0x1e,0xd7,0x58,0xc6,0x2d,0xc2,0x9c,
|
||||
0x06,0x31,0x8f,0x5b,0x57,0x8e,0x76,0xba,0xd5,0xf6,0xec,0xfe,0x85,0x1f,0x34,0x0c,
|
||||
};
|
||||
188
vendor/scrypt-jane/code/scrypt-jane-hash_skein512.h
vendored
188
vendor/scrypt-jane/code/scrypt-jane-hash_skein512.h
vendored
|
|
@ -1,188 +0,0 @@
|
|||
#define SCRYPT_HASH "Skein-512"
|
||||
#define SCRYPT_HASH_BLOCK_SIZE 64
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 64
|
||||
|
||||
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
||||
|
||||
typedef struct scrypt_hash_state_t {
|
||||
uint64_t X[8], T[2];
|
||||
uint32_t leftover;
|
||||
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
|
||||
} scrypt_hash_state;
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
static void
|
||||
skein512_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks, size_t add) {
|
||||
uint64_t X[8], key[8], Xt[9+18], T[3+1];
|
||||
size_t r;
|
||||
|
||||
while (blocks--) {
|
||||
T[0] = S->T[0] + add;
|
||||
T[1] = S->T[1];
|
||||
T[2] = T[0] ^ T[1];
|
||||
key[0] = U8TO64_LE(in + 0); Xt[0] = S->X[0]; X[0] = key[0] + Xt[0];
|
||||
key[1] = U8TO64_LE(in + 8); Xt[1] = S->X[1]; X[1] = key[1] + Xt[1];
|
||||
key[2] = U8TO64_LE(in + 16); Xt[2] = S->X[2]; X[2] = key[2] + Xt[2];
|
||||
key[3] = U8TO64_LE(in + 24); Xt[3] = S->X[3]; X[3] = key[3] + Xt[3];
|
||||
key[4] = U8TO64_LE(in + 32); Xt[4] = S->X[4]; X[4] = key[4] + Xt[4];
|
||||
key[5] = U8TO64_LE(in + 40); Xt[5] = S->X[5]; X[5] = key[5] + Xt[5] + T[0];
|
||||
key[6] = U8TO64_LE(in + 48); Xt[6] = S->X[6]; X[6] = key[6] + Xt[6] + T[1];
|
||||
key[7] = U8TO64_LE(in + 56); Xt[7] = S->X[7]; X[7] = key[7] + Xt[7];
|
||||
Xt[8] = 0x1BD11BDAA9FC1A22ull ^ Xt[0] ^ Xt[1] ^ Xt[2] ^ Xt[3] ^ Xt[4] ^ Xt[5] ^ Xt[6] ^ Xt[7];
|
||||
in += SCRYPT_HASH_BLOCK_SIZE;
|
||||
|
||||
for (r = 0; r < 18; r++)
|
||||
Xt[r + 9] = Xt[r + 0];
|
||||
|
||||
for (r = 0; r < 18; r += 2) {
|
||||
X[0] += X[1]; X[1] = ROTL64(X[1], 46) ^ X[0];
|
||||
X[2] += X[3]; X[3] = ROTL64(X[3], 36) ^ X[2];
|
||||
X[4] += X[5]; X[5] = ROTL64(X[5], 19) ^ X[4];
|
||||
X[6] += X[7]; X[7] = ROTL64(X[7], 37) ^ X[6];
|
||||
X[2] += X[1]; X[1] = ROTL64(X[1], 33) ^ X[2];
|
||||
X[0] += X[3]; X[3] = ROTL64(X[3], 42) ^ X[0];
|
||||
X[6] += X[5]; X[5] = ROTL64(X[5], 14) ^ X[6];
|
||||
X[4] += X[7]; X[7] = ROTL64(X[7], 27) ^ X[4];
|
||||
X[4] += X[1]; X[1] = ROTL64(X[1], 17) ^ X[4];
|
||||
X[6] += X[3]; X[3] = ROTL64(X[3], 49) ^ X[6];
|
||||
X[0] += X[5]; X[5] = ROTL64(X[5], 36) ^ X[0];
|
||||
X[2] += X[7]; X[7] = ROTL64(X[7], 39) ^ X[2];
|
||||
X[6] += X[1]; X[1] = ROTL64(X[1], 44) ^ X[6];
|
||||
X[4] += X[3]; X[3] = ROTL64(X[3], 56) ^ X[4];
|
||||
X[2] += X[5]; X[5] = ROTL64(X[5], 54) ^ X[2];
|
||||
X[0] += X[7]; X[7] = ROTL64(X[7], 9) ^ X[0];
|
||||
|
||||
X[0] += Xt[r + 1];
|
||||
X[1] += Xt[r + 2];
|
||||
X[2] += Xt[r + 3];
|
||||
X[3] += Xt[r + 4];
|
||||
X[4] += Xt[r + 5];
|
||||
X[5] += Xt[r + 6] + T[1];
|
||||
X[6] += Xt[r + 7] + T[2];
|
||||
X[7] += Xt[r + 8] + r + 1;
|
||||
|
||||
T[3] = T[0];
|
||||
T[0] = T[1];
|
||||
T[1] = T[2];
|
||||
T[2] = T[3];
|
||||
|
||||
X[0] += X[1]; X[1] = ROTL64(X[1], 39) ^ X[0];
|
||||
X[2] += X[3]; X[3] = ROTL64(X[3], 30) ^ X[2];
|
||||
X[4] += X[5]; X[5] = ROTL64(X[5], 34) ^ X[4];
|
||||
X[6] += X[7]; X[7] = ROTL64(X[7], 24) ^ X[6];
|
||||
X[2] += X[1]; X[1] = ROTL64(X[1], 13) ^ X[2];
|
||||
X[0] += X[3]; X[3] = ROTL64(X[3], 17) ^ X[0];
|
||||
X[6] += X[5]; X[5] = ROTL64(X[5], 10) ^ X[6];
|
||||
X[4] += X[7]; X[7] = ROTL64(X[7], 50) ^ X[4];
|
||||
X[4] += X[1]; X[1] = ROTL64(X[1], 25) ^ X[4];
|
||||
X[6] += X[3]; X[3] = ROTL64(X[3], 29) ^ X[6];
|
||||
X[0] += X[5]; X[5] = ROTL64(X[5], 39) ^ X[0];
|
||||
X[2] += X[7]; X[7] = ROTL64(X[7], 43) ^ X[2];
|
||||
X[6] += X[1]; X[1] = ROTL64(X[1], 8) ^ X[6];
|
||||
X[4] += X[3]; X[3] = ROTL64(X[3], 22) ^ X[4];
|
||||
X[2] += X[5]; X[5] = ROTL64(X[5], 56) ^ X[2];
|
||||
X[0] += X[7]; X[7] = ROTL64(X[7], 35) ^ X[0];
|
||||
|
||||
X[0] += Xt[r + 2];
|
||||
X[1] += Xt[r + 3];
|
||||
X[2] += Xt[r + 4];
|
||||
X[3] += Xt[r + 5];
|
||||
X[4] += Xt[r + 6];
|
||||
X[5] += Xt[r + 7] + T[1];
|
||||
X[6] += Xt[r + 8] + T[2];
|
||||
X[7] += Xt[r + 9] + r + 2;
|
||||
|
||||
T[3] = T[0];
|
||||
T[0] = T[1];
|
||||
T[1] = T[2];
|
||||
T[2] = T[3];
|
||||
}
|
||||
|
||||
S->X[0] = key[0] ^ X[0];
|
||||
S->X[1] = key[1] ^ X[1];
|
||||
S->X[2] = key[2] ^ X[2];
|
||||
S->X[3] = key[3] ^ X[3];
|
||||
S->X[4] = key[4] ^ X[4];
|
||||
S->X[5] = key[5] ^ X[5];
|
||||
S->X[6] = key[6] ^ X[6];
|
||||
S->X[7] = key[7] ^ X[7];
|
||||
|
||||
S->T[0] = T[0];
|
||||
S->T[1] = T[1] & ~0x4000000000000000ull;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_init(scrypt_hash_state *S) {
|
||||
S->X[0] = 0x4903ADFF749C51CEull;
|
||||
S->X[1] = 0x0D95DE399746DF03ull;
|
||||
S->X[2] = 0x8FD1934127C79BCEull;
|
||||
S->X[3] = 0x9A255629FF352CB1ull;
|
||||
S->X[4] = 0x5DB62599DF6CA7B0ull;
|
||||
S->X[5] = 0xEABE394CA9D5C3F4ull;
|
||||
S->X[6] = 0x991112C71A75B523ull;
|
||||
S->X[7] = 0xAE18A40B660FCC33ull;
|
||||
S->T[0] = 0x0000000000000000ull;
|
||||
S->T[1] = 0x7000000000000000ull;
|
||||
S->leftover = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
|
||||
size_t blocks, want;
|
||||
|
||||
/* skein processes the final <=64 bytes raw, so we can only update if there are at least 64+1 bytes available */
|
||||
if ((S->leftover + inlen) > SCRYPT_HASH_BLOCK_SIZE) {
|
||||
/* handle the previous data, we know there is enough for at least one block */
|
||||
if (S->leftover) {
|
||||
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
|
||||
memcpy(S->buffer + S->leftover, in, want);
|
||||
in += want;
|
||||
inlen -= want;
|
||||
S->leftover = 0;
|
||||
skein512_blocks(S, S->buffer, 1, SCRYPT_HASH_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
/* handle the current data if there's more than one block */
|
||||
if (inlen > SCRYPT_HASH_BLOCK_SIZE) {
|
||||
blocks = ((inlen - 1) & ~(SCRYPT_HASH_BLOCK_SIZE - 1));
|
||||
skein512_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE, SCRYPT_HASH_BLOCK_SIZE);
|
||||
inlen -= blocks;
|
||||
in += blocks;
|
||||
}
|
||||
}
|
||||
|
||||
/* handle leftover data */
|
||||
memcpy(S->buffer + S->leftover, in, inlen);
|
||||
S->leftover += inlen;
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {
|
||||
memset(S->buffer + S->leftover, 0, SCRYPT_HASH_BLOCK_SIZE - S->leftover);
|
||||
S->T[1] |= 0x8000000000000000ull;
|
||||
skein512_blocks(S, S->buffer, 1, S->leftover);
|
||||
|
||||
memset(S->buffer, 0, SCRYPT_HASH_BLOCK_SIZE);
|
||||
S->T[0] = 0;
|
||||
S->T[1] = 0xff00000000000000ull;
|
||||
skein512_blocks(S, S->buffer, 1, 8);
|
||||
|
||||
U64TO8_LE(&hash[ 0], S->X[0]);
|
||||
U64TO8_LE(&hash[ 8], S->X[1]);
|
||||
U64TO8_LE(&hash[16], S->X[2]);
|
||||
U64TO8_LE(&hash[24], S->X[3]);
|
||||
U64TO8_LE(&hash[32], S->X[4]);
|
||||
U64TO8_LE(&hash[40], S->X[5]);
|
||||
U64TO8_LE(&hash[48], S->X[6]);
|
||||
U64TO8_LE(&hash[56], S->X[7]);
|
||||
}
|
||||
|
||||
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
|
||||
0x4d,0x52,0x29,0xff,0x10,0xbc,0xd2,0x62,0xd1,0x61,0x83,0xc8,0xe6,0xf0,0x83,0xc4,
|
||||
0x9f,0xf5,0x6a,0x42,0x75,0x2a,0x26,0x4e,0xf0,0x28,0x72,0x28,0x47,0xe8,0x23,0xdf,
|
||||
0x1e,0x64,0xf1,0x51,0x38,0x35,0x9d,0xc2,0x83,0xfc,0x35,0x4e,0xc0,0x52,0x5f,0x41,
|
||||
0x6a,0x0b,0x7d,0xf5,0xce,0x98,0xde,0x6f,0x36,0xd8,0x51,0x15,0x78,0x78,0x93,0x67,
|
||||
};
|
||||
368
vendor/scrypt-jane/code/scrypt-jane-mix_chacha-avx.h
vendored
368
vendor/scrypt-jane/code/scrypt-jane-mix_chacha-avx.h
vendored
|
|
@ -1,368 +0,0 @@
|
|||
/* x86 */
|
||||
#if defined(X86ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_CHACHA_AVX
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_avx)
|
||||
a1(push ebx)
|
||||
a1(push edi)
|
||||
a1(push esi)
|
||||
a1(push ebp)
|
||||
a2(mov ebp,esp)
|
||||
a2(mov edi,[ebp+20])
|
||||
a2(mov esi,[ebp+24])
|
||||
a2(mov eax,[ebp+28])
|
||||
a2(mov ebx,[ebp+32])
|
||||
a2(sub esp,64)
|
||||
a2(and esp,~63)
|
||||
a2(lea edx,[ebx*2])
|
||||
a2(shl edx,6)
|
||||
a2(lea ecx,[edx-64])
|
||||
a2(and eax, eax)
|
||||
a2(mov ebx, 0x01000302)
|
||||
a2(vmovd xmm4, ebx)
|
||||
a2(mov ebx, 0x05040706)
|
||||
a2(vmovd xmm0, ebx)
|
||||
a2(mov ebx, 0x09080b0a)
|
||||
a2(vmovd xmm1, ebx)
|
||||
a2(mov ebx, 0x0d0c0f0e)
|
||||
a2(vmovd xmm2, ebx)
|
||||
a2(mov ebx, 0x02010003)
|
||||
a2(vmovd xmm5, ebx)
|
||||
a2(mov ebx, 0x06050407)
|
||||
a2(vmovd xmm3, ebx)
|
||||
a2(mov ebx, 0x0a09080b)
|
||||
a2(vmovd xmm6, ebx)
|
||||
a2(mov ebx, 0x0e0d0c0f)
|
||||
a2(vmovd xmm7, ebx)
|
||||
a3(vpunpckldq xmm4, xmm4, xmm0)
|
||||
a3(vpunpckldq xmm5, xmm5, xmm3)
|
||||
a3(vpunpckldq xmm1, xmm1, xmm2)
|
||||
a3(vpunpckldq xmm6, xmm6, xmm7)
|
||||
a3(vpunpcklqdq xmm4, xmm4, xmm1)
|
||||
a3(vpunpcklqdq xmm5, xmm5, xmm6)
|
||||
a2(vmovdqa xmm0,[ecx+esi+0])
|
||||
a2(vmovdqa xmm1,[ecx+esi+16])
|
||||
a2(vmovdqa xmm2,[ecx+esi+32])
|
||||
a2(vmovdqa xmm3,[ecx+esi+48])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[ecx+eax+0])
|
||||
a3(vpxor xmm1,xmm1,[ecx+eax+16])
|
||||
a3(vpxor xmm2,xmm2,[ecx+eax+32])
|
||||
a3(vpxor xmm3,xmm3,[ecx+eax+48])
|
||||
a1(scrypt_ChunkMix_avx_no_xor1:)
|
||||
a2(xor ecx,ecx)
|
||||
a2(xor ebx,ebx)
|
||||
a1(scrypt_ChunkMix_avx_loop:)
|
||||
a2(and eax, eax)
|
||||
a3(vpxor xmm0,xmm0,[esi+ecx+0])
|
||||
a3(vpxor xmm1,xmm1,[esi+ecx+16])
|
||||
a3(vpxor xmm2,xmm2,[esi+ecx+32])
|
||||
a3(vpxor xmm3,xmm3,[esi+ecx+48])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[eax+ecx+0])
|
||||
a3(vpxor xmm1,xmm1,[eax+ecx+16])
|
||||
a3(vpxor xmm2,xmm2,[eax+ecx+32])
|
||||
a3(vpxor xmm3,xmm3,[eax+ecx+48])
|
||||
a1(scrypt_ChunkMix_avx_no_xor2:)
|
||||
a2(vmovdqa [esp+0],xmm0)
|
||||
a2(vmovdqa [esp+16],xmm1)
|
||||
a2(vmovdqa [esp+32],xmm2)
|
||||
a2(vmovdqa [esp+48],xmm3)
|
||||
a2(mov eax,8)
|
||||
a1(scrypt_chacha_avx_loop: )
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vpshufb xmm3,xmm3,xmm4)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpsrld xmm6,xmm1,20)
|
||||
a3(vpslld xmm1,xmm1,12)
|
||||
a3(vpxor xmm1,xmm1,xmm6)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vpshufb xmm3,xmm3,xmm5)
|
||||
a3(vpshufd xmm0,xmm0,0x93)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpshufd xmm3,xmm3,0x4e)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpshufd xmm2,xmm2,0x39)
|
||||
a3(vpsrld xmm6,xmm1,25)
|
||||
a3(vpslld xmm1,xmm1,7)
|
||||
a3(vpxor xmm1,xmm1,xmm6)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vpshufb xmm3,xmm3,xmm4)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpsrld xmm6,xmm1,20)
|
||||
a3(vpslld xmm1,xmm1,12)
|
||||
a3(vpxor xmm1,xmm1,xmm6)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vpshufb xmm3,xmm3,xmm5)
|
||||
a3(vpshufd xmm0,xmm0,0x39)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpshufd xmm3,xmm3,0x4e)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpshufd xmm2,xmm2,0x93)
|
||||
a3(vpsrld xmm6,xmm1,25)
|
||||
a3(vpslld xmm1,xmm1,7)
|
||||
a3(vpxor xmm1,xmm1,xmm6)
|
||||
a2(sub eax,2)
|
||||
aj(ja scrypt_chacha_avx_loop)
|
||||
a3(vpaddd xmm0,xmm0,[esp+0])
|
||||
a3(vpaddd xmm1,xmm1,[esp+16])
|
||||
a3(vpaddd xmm2,xmm2,[esp+32])
|
||||
a3(vpaddd xmm3,xmm3,[esp+48])
|
||||
a2(lea eax,[ebx+ecx])
|
||||
a2(xor ebx,edx)
|
||||
a2(and eax,~0x7f)
|
||||
a2(add ecx,64)
|
||||
a2(shr eax,1)
|
||||
a2(add eax, edi)
|
||||
a2(cmp ecx,edx)
|
||||
a2(vmovdqa [eax+0],xmm0)
|
||||
a2(vmovdqa [eax+16],xmm1)
|
||||
a2(vmovdqa [eax+32],xmm2)
|
||||
a2(vmovdqa [eax+48],xmm3)
|
||||
a2(mov eax,[ebp+28])
|
||||
aj(jne scrypt_ChunkMix_avx_loop)
|
||||
a2(mov esp,ebp)
|
||||
a1(pop ebp)
|
||||
a1(pop esi)
|
||||
a1(pop edi)
|
||||
a1(pop ebx)
|
||||
aret(16)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_avx)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_CHACHA_AVX
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_avx)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,6)
|
||||
a2(lea r9,[rcx-64])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(vmovdqa xmm0,[rax+0])
|
||||
a2(vmovdqa xmm1,[rax+16])
|
||||
a2(vmovdqa xmm2,[rax+32])
|
||||
a2(vmovdqa xmm3,[rax+48])
|
||||
a2(mov r8, 0x0504070601000302)
|
||||
a2(mov rax, 0x0d0c0f0e09080b0a)
|
||||
a2(movd xmm4, r8)
|
||||
a2(movd xmm6, rax)
|
||||
a2(mov r8, 0x0605040702010003)
|
||||
a2(mov rax, 0x0e0d0c0f0a09080b)
|
||||
a2(movd xmm5, r8)
|
||||
a2(movd xmm7, rax)
|
||||
a3(vpunpcklqdq xmm4, xmm4, xmm6)
|
||||
a3(vpunpcklqdq xmm5, xmm5, xmm7)
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[r9+0])
|
||||
a3(vpxor xmm1,xmm1,[r9+16])
|
||||
a3(vpxor xmm2,xmm2,[r9+32])
|
||||
a3(vpxor xmm3,xmm3,[r9+48])
|
||||
a1(scrypt_ChunkMix_avx_no_xor1:)
|
||||
a2(xor r8,r8)
|
||||
a2(xor r9,r9)
|
||||
a1(scrypt_ChunkMix_avx_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a3(vpxor xmm0,xmm0,[rsi+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rsi+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rsi+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rsi+r9+48])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[rdx+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rdx+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rdx+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rdx+r9+48])
|
||||
a1(scrypt_ChunkMix_avx_no_xor2:)
|
||||
a2(vmovdqa xmm8,xmm0)
|
||||
a2(vmovdqa xmm9,xmm1)
|
||||
a2(vmovdqa xmm10,xmm2)
|
||||
a2(vmovdqa xmm11,xmm3)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_chacha_avx_loop: )
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vpshufb xmm3,xmm3,xmm4)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpsrld xmm12,xmm1,20)
|
||||
a3(vpslld xmm1,xmm1,12)
|
||||
a3(vpxor xmm1,xmm1,xmm12)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vpshufb xmm3,xmm3,xmm5)
|
||||
a3(vpshufd xmm0,xmm0,0x93)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpshufd xmm3,xmm3,0x4e)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpshufd xmm2,xmm2,0x39)
|
||||
a3(vpsrld xmm12,xmm1,25)
|
||||
a3(vpslld xmm1,xmm1,7)
|
||||
a3(vpxor xmm1,xmm1,xmm12)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vpshufb xmm3,xmm3,xmm4)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpsrld xmm12,xmm1,20)
|
||||
a3(vpslld xmm1,xmm1,12)
|
||||
a3(vpxor xmm1,xmm1,xmm12)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vpshufb xmm3,xmm3,xmm5)
|
||||
a3(vpshufd xmm0,xmm0,0x39)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpshufd xmm3,xmm3,0x4e)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpshufd xmm2,xmm2,0x93)
|
||||
a3(vpsrld xmm12,xmm1,25)
|
||||
a3(vpslld xmm1,xmm1,7)
|
||||
a3(vpxor xmm1,xmm1,xmm12)
|
||||
a2(sub rax,2)
|
||||
aj(ja scrypt_chacha_avx_loop)
|
||||
a3(vpaddd xmm0,xmm0,xmm8)
|
||||
a3(vpaddd xmm1,xmm1,xmm9)
|
||||
a3(vpaddd xmm2,xmm2,xmm10)
|
||||
a3(vpaddd xmm3,xmm3,xmm11)
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0x7f)
|
||||
a2(add r9,64)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(vmovdqa [rax+0],xmm0)
|
||||
a2(vmovdqa [rax+16],xmm1)
|
||||
a2(vmovdqa [rax+32],xmm2)
|
||||
a2(vmovdqa [rax+48],xmm3)
|
||||
aj(jne scrypt_ChunkMix_avx_loop)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_avx)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED))
|
||||
|
||||
#define SCRYPT_CHACHA_AVX
|
||||
|
||||
static void asm_calling_convention NOINLINE
|
||||
scrypt_ChunkMix_avx(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x6,t0,t1,t2,t3;
|
||||
const xmmi x4 = *(xmmi *)&ssse3_rotl16_32bit, x5 = *(xmmi *)&ssse3_rotl8_32bit;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_shuffle_epi8(x3, x4);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x6 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_shuffle_epi8(x3, x5);
|
||||
x0 = _mm_shuffle_epi32(x0, 0x93);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x4e);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x39);
|
||||
x6 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_shuffle_epi8(x3, x4);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x6 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_shuffle_epi8(x3, x5);
|
||||
x0 = _mm_shuffle_epi32(x0, 0x39);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x4e);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x93);
|
||||
x6 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25));
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi32(x0, t0);
|
||||
x1 = _mm_add_epi32(x1, t1);
|
||||
x2 = _mm_add_epi32(x2, t2);
|
||||
x3 = _mm_add_epi32(x3, t3);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_AVX)
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "ChaCha/8-AVX"
|
||||
#undef SCRYPT_CHACHA_INCLUDED
|
||||
#define SCRYPT_CHACHA_INCLUDED
|
||||
#endif
|
||||
|
|
@ -1,363 +0,0 @@
|
|||
/* x86 */
|
||||
#if defined(X86ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_CHACHA_SSE2
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_sse2)
|
||||
a1(push ebx)
|
||||
a1(push edi)
|
||||
a1(push esi)
|
||||
a1(push ebp)
|
||||
a2(mov ebp,esp)
|
||||
a2(mov edi,[ebp+20])
|
||||
a2(mov esi,[ebp+24])
|
||||
a2(mov eax,[ebp+28])
|
||||
a2(mov ebx,[ebp+32])
|
||||
a2(sub esp,16)
|
||||
a2(and esp,~15)
|
||||
a2(lea edx,[ebx*2])
|
||||
a2(shl edx,6)
|
||||
a2(lea ecx,[edx-64])
|
||||
a2(and eax, eax)
|
||||
a2(movdqa xmm0,[ecx+esi+0])
|
||||
a2(movdqa xmm1,[ecx+esi+16])
|
||||
a2(movdqa xmm2,[ecx+esi+32])
|
||||
a2(movdqa xmm3,[ecx+esi+48])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor1)
|
||||
a2(pxor xmm0,[ecx+eax+0])
|
||||
a2(pxor xmm1,[ecx+eax+16])
|
||||
a2(pxor xmm2,[ecx+eax+32])
|
||||
a2(pxor xmm3,[ecx+eax+48])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor1:)
|
||||
a2(xor ecx,ecx)
|
||||
a2(xor ebx,ebx)
|
||||
a1(scrypt_ChunkMix_sse2_loop:)
|
||||
a2(and eax, eax)
|
||||
a2(pxor xmm0,[esi+ecx+0])
|
||||
a2(pxor xmm1,[esi+ecx+16])
|
||||
a2(pxor xmm2,[esi+ecx+32])
|
||||
a2(pxor xmm3,[esi+ecx+48])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor2)
|
||||
a2(pxor xmm0,[eax+ecx+0])
|
||||
a2(pxor xmm1,[eax+ecx+16])
|
||||
a2(pxor xmm2,[eax+ecx+32])
|
||||
a2(pxor xmm3,[eax+ecx+48])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor2:)
|
||||
a2(movdqa [esp+0],xmm0)
|
||||
a2(movdqa xmm4,xmm1)
|
||||
a2(movdqa xmm5,xmm2)
|
||||
a2(movdqa xmm7,xmm3)
|
||||
a2(mov eax,8)
|
||||
a1(scrypt_chacha_sse2_loop: )
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a3(pshuflw xmm3,xmm3,0xb1)
|
||||
a3(pshufhw xmm3,xmm3,0xb1)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,12)
|
||||
a2(psrld xmm6,20)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(movdqa xmm6,xmm3)
|
||||
a2(pslld xmm3,8)
|
||||
a2(psrld xmm6,24)
|
||||
a2(pxor xmm3,xmm6)
|
||||
a3(pshufd xmm0,xmm0,0x93)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a3(pshufd xmm2,xmm2,0x39)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,7)
|
||||
a2(psrld xmm6,25)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(sub eax,2)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a3(pshuflw xmm3,xmm3,0xb1)
|
||||
a3(pshufhw xmm3,xmm3,0xb1)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,12)
|
||||
a2(psrld xmm6,20)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(movdqa xmm6,xmm3)
|
||||
a2(pslld xmm3,8)
|
||||
a2(psrld xmm6,24)
|
||||
a2(pxor xmm3,xmm6)
|
||||
a3(pshufd xmm0,xmm0,0x39)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a3(pshufd xmm2,xmm2,0x93)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,7)
|
||||
a2(psrld xmm6,25)
|
||||
a2(pxor xmm1,xmm6)
|
||||
aj(ja scrypt_chacha_sse2_loop)
|
||||
a2(paddd xmm0,[esp+0])
|
||||
a2(paddd xmm1,xmm4)
|
||||
a2(paddd xmm2,xmm5)
|
||||
a2(paddd xmm3,xmm7)
|
||||
a2(lea eax,[ebx+ecx])
|
||||
a2(xor ebx,edx)
|
||||
a2(and eax,~0x7f)
|
||||
a2(add ecx,64)
|
||||
a2(shr eax,1)
|
||||
a2(add eax, edi)
|
||||
a2(cmp ecx,edx)
|
||||
a2(movdqa [eax+0],xmm0)
|
||||
a2(movdqa [eax+16],xmm1)
|
||||
a2(movdqa [eax+32],xmm2)
|
||||
a2(movdqa [eax+48],xmm3)
|
||||
a2(mov eax,[ebp+28])
|
||||
aj(jne scrypt_ChunkMix_sse2_loop)
|
||||
a2(mov esp,ebp)
|
||||
a1(pop ebp)
|
||||
a1(pop esi)
|
||||
a1(pop edi)
|
||||
a1(pop ebx)
|
||||
aret(16)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_sse2)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_CHACHA_SSE2
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_sse2)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,6)
|
||||
a2(lea r9,[rcx-64])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(movdqa xmm0,[rax+0])
|
||||
a2(movdqa xmm1,[rax+16])
|
||||
a2(movdqa xmm2,[rax+32])
|
||||
a2(movdqa xmm3,[rax+48])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor1)
|
||||
a2(pxor xmm0,[r9+0])
|
||||
a2(pxor xmm1,[r9+16])
|
||||
a2(pxor xmm2,[r9+32])
|
||||
a2(pxor xmm3,[r9+48])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_sse2_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a2(pxor xmm0,[rsi+r9+0])
|
||||
a2(pxor xmm1,[rsi+r9+16])
|
||||
a2(pxor xmm2,[rsi+r9+32])
|
||||
a2(pxor xmm3,[rsi+r9+48])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor2)
|
||||
a2(pxor xmm0,[rdx+r9+0])
|
||||
a2(pxor xmm1,[rdx+r9+16])
|
||||
a2(pxor xmm2,[rdx+r9+32])
|
||||
a2(pxor xmm3,[rdx+r9+48])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor2:)
|
||||
a2(movdqa xmm8,xmm0)
|
||||
a2(movdqa xmm9,xmm1)
|
||||
a2(movdqa xmm10,xmm2)
|
||||
a2(movdqa xmm11,xmm3)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_chacha_sse2_loop: )
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a3(pshuflw xmm3,xmm3,0xb1)
|
||||
a3(pshufhw xmm3,xmm3,0xb1)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,12)
|
||||
a2(psrld xmm6,20)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(movdqa xmm6,xmm3)
|
||||
a2(pslld xmm3,8)
|
||||
a2(psrld xmm6,24)
|
||||
a2(pxor xmm3,xmm6)
|
||||
a3(pshufd xmm0,xmm0,0x93)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a3(pshufd xmm2,xmm2,0x39)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,7)
|
||||
a2(psrld xmm6,25)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(sub rax,2)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a3(pshuflw xmm3,xmm3,0xb1)
|
||||
a3(pshufhw xmm3,xmm3,0xb1)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,12)
|
||||
a2(psrld xmm6,20)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(movdqa xmm6,xmm3)
|
||||
a2(pslld xmm3,8)
|
||||
a2(psrld xmm6,24)
|
||||
a2(pxor xmm3,xmm6)
|
||||
a3(pshufd xmm0,xmm0,0x39)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a3(pshufd xmm2,xmm2,0x93)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,7)
|
||||
a2(psrld xmm6,25)
|
||||
a2(pxor xmm1,xmm6)
|
||||
aj(ja scrypt_chacha_sse2_loop)
|
||||
a2(paddd xmm0,xmm8)
|
||||
a2(paddd xmm1,xmm9)
|
||||
a2(paddd xmm2,xmm10)
|
||||
a2(paddd xmm3,xmm11)
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0x7f)
|
||||
a2(add r9,64)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(movdqa [rax+0],xmm0)
|
||||
a2(movdqa [rax+16],xmm1)
|
||||
a2(movdqa [rax+32],xmm2)
|
||||
a2(movdqa [rax+48],xmm3)
|
||||
aj(jne scrypt_ChunkMix_sse2_loop)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_sse2)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED))
|
||||
|
||||
#define SCRYPT_CHACHA_SSE2
|
||||
|
||||
static void NOINLINE asm_calling_convention
|
||||
scrypt_ChunkMix_sse2(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,t0,t1,t2,t3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x4 = x3;
|
||||
x3 = _mm_shufflehi_epi16(_mm_shufflelo_epi16(x3, 0xb1), 0xb1);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x4 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x4, 20));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x4 = x3;
|
||||
x3 = _mm_or_si128(_mm_slli_epi32(x3, 8), _mm_srli_epi32(x4, 24));
|
||||
x0 = _mm_shuffle_epi32(x0, 0x93);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x4e);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x39);
|
||||
x4 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x4, 25));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x4 = x3;
|
||||
x3 = _mm_shufflehi_epi16(_mm_shufflelo_epi16(x3, 0xb1), 0xb1);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x4 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x4, 20));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x4 = x3;
|
||||
x3 = _mm_or_si128(_mm_slli_epi32(x3, 8), _mm_srli_epi32(x4, 24));
|
||||
x0 = _mm_shuffle_epi32(x0, 0x39);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x4e);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x93);
|
||||
x4 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x4, 25));
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi32(x0, t0);
|
||||
x1 = _mm_add_epi32(x1, t1);
|
||||
x2 = _mm_add_epi32(x2, t2);
|
||||
x3 = _mm_add_epi32(x3, t3);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSE2)
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "ChaCha/8-SSE2"
|
||||
#undef SCRYPT_CHACHA_INCLUDED
|
||||
#define SCRYPT_CHACHA_INCLUDED
|
||||
#endif
|
||||
|
|
@ -1,376 +0,0 @@
|
|||
/* x86 */
|
||||
#if defined(X86ASM_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_CHACHA_SSSE3
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_ssse3)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_ssse3)
|
||||
a1(push ebx)
|
||||
a1(push edi)
|
||||
a1(push esi)
|
||||
a1(push ebp)
|
||||
a2(mov ebp,esp)
|
||||
a2(mov edi,[ebp+20])
|
||||
a2(mov esi,[ebp+24])
|
||||
a2(mov eax,[ebp+28])
|
||||
a2(mov ebx,[ebp+32])
|
||||
a2(sub esp,64)
|
||||
a2(and esp,~63)
|
||||
a2(lea edx,[ebx*2])
|
||||
a2(shl edx,6)
|
||||
a2(lea ecx,[edx-64])
|
||||
a2(and eax, eax)
|
||||
a2(mov ebx, 0x01000302)
|
||||
a2(movd xmm4, ebx)
|
||||
a2(mov ebx, 0x05040706)
|
||||
a2(movd xmm0, ebx)
|
||||
a2(mov ebx, 0x09080b0a)
|
||||
a2(movd xmm1, ebx)
|
||||
a2(mov ebx, 0x0d0c0f0e)
|
||||
a2(movd xmm2, ebx)
|
||||
a2(mov ebx, 0x02010003)
|
||||
a2(movd xmm5, ebx)
|
||||
a2(mov ebx, 0x06050407)
|
||||
a2(movd xmm3, ebx)
|
||||
a2(mov ebx, 0x0a09080b)
|
||||
a2(movd xmm6, ebx)
|
||||
a2(mov ebx, 0x0e0d0c0f)
|
||||
a2(movd xmm7, ebx)
|
||||
a2(punpckldq xmm4, xmm0)
|
||||
a2(punpckldq xmm5, xmm3)
|
||||
a2(punpckldq xmm1, xmm2)
|
||||
a2(punpckldq xmm6, xmm7)
|
||||
a2(punpcklqdq xmm4, xmm1)
|
||||
a2(punpcklqdq xmm5, xmm6)
|
||||
a2(movdqa xmm0,[ecx+esi+0])
|
||||
a2(movdqa xmm1,[ecx+esi+16])
|
||||
a2(movdqa xmm2,[ecx+esi+32])
|
||||
a2(movdqa xmm3,[ecx+esi+48])
|
||||
aj(jz scrypt_ChunkMix_ssse3_no_xor1)
|
||||
a2(pxor xmm0,[ecx+eax+0])
|
||||
a2(pxor xmm1,[ecx+eax+16])
|
||||
a2(pxor xmm2,[ecx+eax+32])
|
||||
a2(pxor xmm3,[ecx+eax+48])
|
||||
a1(scrypt_ChunkMix_ssse3_no_xor1:)
|
||||
a2(xor ecx,ecx)
|
||||
a2(xor ebx,ebx)
|
||||
a1(scrypt_ChunkMix_ssse3_loop:)
|
||||
a2(and eax, eax)
|
||||
a2(pxor xmm0,[esi+ecx+0])
|
||||
a2(pxor xmm1,[esi+ecx+16])
|
||||
a2(pxor xmm2,[esi+ecx+32])
|
||||
a2(pxor xmm3,[esi+ecx+48])
|
||||
aj(jz scrypt_ChunkMix_ssse3_no_xor2)
|
||||
a2(pxor xmm0,[eax+ecx+0])
|
||||
a2(pxor xmm1,[eax+ecx+16])
|
||||
a2(pxor xmm2,[eax+ecx+32])
|
||||
a2(pxor xmm3,[eax+ecx+48])
|
||||
a1(scrypt_ChunkMix_ssse3_no_xor2:)
|
||||
a2(movdqa [esp+0],xmm0)
|
||||
a2(movdqa [esp+16],xmm1)
|
||||
a2(movdqa [esp+32],xmm2)
|
||||
a2(movdqa xmm7,xmm3)
|
||||
a2(mov eax,8)
|
||||
a1(scrypt_chacha_ssse3_loop: )
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(pshufb xmm3,xmm4)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,12)
|
||||
a2(psrld xmm6,20)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(pshufb xmm3,xmm5)
|
||||
a3(pshufd xmm0,xmm0,0x93)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a3(pshufd xmm2,xmm2,0x39)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,7)
|
||||
a2(psrld xmm6,25)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(sub eax,2)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(pshufb xmm3,xmm4)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,12)
|
||||
a2(psrld xmm6,20)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(pshufb xmm3,xmm5)
|
||||
a3(pshufd xmm0,xmm0,0x39)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a3(pshufd xmm2,xmm2,0x93)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,7)
|
||||
a2(psrld xmm6,25)
|
||||
a2(pxor xmm1,xmm6)
|
||||
aj(ja scrypt_chacha_ssse3_loop)
|
||||
a2(paddd xmm0,[esp+0])
|
||||
a2(paddd xmm1,[esp+16])
|
||||
a2(paddd xmm2,[esp+32])
|
||||
a2(paddd xmm3,xmm7)
|
||||
a2(lea eax,[ebx+ecx])
|
||||
a2(xor ebx,edx)
|
||||
a2(and eax,~0x7f)
|
||||
a2(add ecx,64)
|
||||
a2(shr eax,1)
|
||||
a2(add eax, edi)
|
||||
a2(cmp ecx,edx)
|
||||
a2(movdqa [eax+0],xmm0)
|
||||
a2(movdqa [eax+16],xmm1)
|
||||
a2(movdqa [eax+32],xmm2)
|
||||
a2(movdqa [eax+48],xmm3)
|
||||
a2(mov eax,[ebp+28])
|
||||
aj(jne scrypt_ChunkMix_ssse3_loop)
|
||||
a2(mov esp,ebp)
|
||||
a1(pop ebp)
|
||||
a1(pop esi)
|
||||
a1(pop edi)
|
||||
a1(pop ebx)
|
||||
aret(16)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_ssse3)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_CHACHA_SSSE3
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_ssse3)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_ssse3)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,6)
|
||||
a2(lea r9,[rcx-64])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(movdqa xmm0,[rax+0])
|
||||
a2(movdqa xmm1,[rax+16])
|
||||
a2(movdqa xmm2,[rax+32])
|
||||
a2(movdqa xmm3,[rax+48])
|
||||
a2(mov r8, 0x0504070601000302)
|
||||
a2(mov rax, 0x0d0c0f0e09080b0a)
|
||||
a2(movd xmm4, r8)
|
||||
a2(movd xmm6, rax)
|
||||
a2(mov r8, 0x0605040702010003)
|
||||
a2(mov rax, 0x0e0d0c0f0a09080b)
|
||||
a2(movd xmm5, r8)
|
||||
a2(movd xmm7, rax)
|
||||
a2(punpcklqdq xmm4, xmm6)
|
||||
a2(punpcklqdq xmm5, xmm7)
|
||||
aj(jz scrypt_ChunkMix_ssse3_no_xor1)
|
||||
a2(pxor xmm0,[r9+0])
|
||||
a2(pxor xmm1,[r9+16])
|
||||
a2(pxor xmm2,[r9+32])
|
||||
a2(pxor xmm3,[r9+48])
|
||||
a1(scrypt_ChunkMix_ssse3_no_xor1:)
|
||||
a2(xor r8,r8)
|
||||
a2(xor r9,r9)
|
||||
a1(scrypt_ChunkMix_ssse3_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a2(pxor xmm0,[rsi+r9+0])
|
||||
a2(pxor xmm1,[rsi+r9+16])
|
||||
a2(pxor xmm2,[rsi+r9+32])
|
||||
a2(pxor xmm3,[rsi+r9+48])
|
||||
aj(jz scrypt_ChunkMix_ssse3_no_xor2)
|
||||
a2(pxor xmm0,[rdx+r9+0])
|
||||
a2(pxor xmm1,[rdx+r9+16])
|
||||
a2(pxor xmm2,[rdx+r9+32])
|
||||
a2(pxor xmm3,[rdx+r9+48])
|
||||
a1(scrypt_ChunkMix_ssse3_no_xor2:)
|
||||
a2(movdqa xmm8,xmm0)
|
||||
a2(movdqa xmm9,xmm1)
|
||||
a2(movdqa xmm10,xmm2)
|
||||
a2(movdqa xmm11,xmm3)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_chacha_ssse3_loop: )
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(pshufb xmm3,xmm4)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a2(movdqa xmm12,xmm1)
|
||||
a2(pslld xmm1,12)
|
||||
a2(psrld xmm12,20)
|
||||
a2(pxor xmm1,xmm12)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(pshufb xmm3,xmm5)
|
||||
a3(pshufd xmm0,xmm0,0x93)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a3(pshufd xmm2,xmm2,0x39)
|
||||
a2(movdqa xmm12,xmm1)
|
||||
a2(pslld xmm1,7)
|
||||
a2(psrld xmm12,25)
|
||||
a2(pxor xmm1,xmm12)
|
||||
a2(sub rax,2)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(pshufb xmm3,xmm4)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a2(movdqa xmm12,xmm1)
|
||||
a2(pslld xmm1,12)
|
||||
a2(psrld xmm12,20)
|
||||
a2(pxor xmm1,xmm12)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(pshufb xmm3,xmm5)
|
||||
a3(pshufd xmm0,xmm0,0x39)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a3(pshufd xmm2,xmm2,0x93)
|
||||
a2(movdqa xmm12,xmm1)
|
||||
a2(pslld xmm1,7)
|
||||
a2(psrld xmm12,25)
|
||||
a2(pxor xmm1,xmm12)
|
||||
aj(ja scrypt_chacha_ssse3_loop)
|
||||
a2(paddd xmm0,xmm8)
|
||||
a2(paddd xmm1,xmm9)
|
||||
a2(paddd xmm2,xmm10)
|
||||
a2(paddd xmm3,xmm11)
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0x7f)
|
||||
a2(add r9,64)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(movdqa [rax+0],xmm0)
|
||||
a2(movdqa [rax+16],xmm1)
|
||||
a2(movdqa [rax+32],xmm2)
|
||||
a2(movdqa [rax+48],xmm3)
|
||||
aj(jne scrypt_ChunkMix_ssse3_loop)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_ssse3)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED))
|
||||
|
||||
#define SCRYPT_CHACHA_SSSE3
|
||||
|
||||
static void NOINLINE asm_calling_convention
|
||||
scrypt_ChunkMix_ssse3(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x6,t0,t1,t2,t3;
|
||||
const xmmi x4 = *(xmmi *)&ssse3_rotl16_32bit, x5 = *(xmmi *)&ssse3_rotl8_32bit;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_shuffle_epi8(x3, x4);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x6 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_shuffle_epi8(x3, x5);
|
||||
x0 = _mm_shuffle_epi32(x0, 0x93);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x4e);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x39);
|
||||
x6 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_shuffle_epi8(x3, x4);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x6 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_shuffle_epi8(x3, x5);
|
||||
x0 = _mm_shuffle_epi32(x0, 0x39);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x4e);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x93);
|
||||
x6 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25));
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi32(x0, t0);
|
||||
x1 = _mm_add_epi32(x1, t1);
|
||||
x2 = _mm_add_epi32(x2, t2);
|
||||
x3 = _mm_add_epi32(x3, t3);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSSE3)
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "ChaCha/8-SSSE3"
|
||||
#undef SCRYPT_CHACHA_INCLUDED
|
||||
#define SCRYPT_CHACHA_INCLUDED
|
||||
#endif
|
||||
315
vendor/scrypt-jane/code/scrypt-jane-mix_chacha-xop.h
vendored
315
vendor/scrypt-jane/code/scrypt-jane-mix_chacha-xop.h
vendored
|
|
@ -1,315 +0,0 @@
|
|||
/* x86 */
|
||||
#if defined(X86ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_CHACHA_XOP
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_xop)
|
||||
a1(push ebx)
|
||||
a1(push edi)
|
||||
a1(push esi)
|
||||
a1(push ebp)
|
||||
a2(mov ebp,esp)
|
||||
a2(mov edi,[ebp+20])
|
||||
a2(mov esi,[ebp+24])
|
||||
a2(mov eax,[ebp+28])
|
||||
a2(mov ebx,[ebp+32])
|
||||
a2(sub esp,64)
|
||||
a2(and esp,~63)
|
||||
a2(lea edx,[ebx*2])
|
||||
a2(shl edx,6)
|
||||
a2(lea ecx,[edx-64])
|
||||
a2(and eax, eax)
|
||||
a2(vmovdqa xmm0,[ecx+esi+0])
|
||||
a2(vmovdqa xmm1,[ecx+esi+16])
|
||||
a2(vmovdqa xmm2,[ecx+esi+32])
|
||||
a2(vmovdqa xmm3,[ecx+esi+48])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[ecx+eax+0])
|
||||
a3(vpxor xmm1,xmm1,[ecx+eax+16])
|
||||
a3(vpxor xmm2,xmm2,[ecx+eax+32])
|
||||
a3(vpxor xmm3,xmm3,[ecx+eax+48])
|
||||
a1(scrypt_ChunkMix_xop_no_xor1:)
|
||||
a2(xor ecx,ecx)
|
||||
a2(xor ebx,ebx)
|
||||
a1(scrypt_ChunkMix_xop_loop:)
|
||||
a2(and eax, eax)
|
||||
a3(vpxor xmm0,xmm0,[esi+ecx+0])
|
||||
a3(vpxor xmm1,xmm1,[esi+ecx+16])
|
||||
a3(vpxor xmm2,xmm2,[esi+ecx+32])
|
||||
a3(vpxor xmm3,xmm3,[esi+ecx+48])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[eax+ecx+0])
|
||||
a3(vpxor xmm1,xmm1,[eax+ecx+16])
|
||||
a3(vpxor xmm2,xmm2,[eax+ecx+32])
|
||||
a3(vpxor xmm3,xmm3,[eax+ecx+48])
|
||||
a1(scrypt_ChunkMix_xop_no_xor2:)
|
||||
a2(vmovdqa xmm4,xmm0)
|
||||
a2(vmovdqa xmm5,xmm1)
|
||||
a2(vmovdqa xmm6,xmm2)
|
||||
a2(vmovdqa xmm7,xmm3)
|
||||
a2(mov eax,8)
|
||||
a1(scrypt_chacha_xop_loop: )
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vprotd xmm3,xmm3,16)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vprotd xmm1,xmm1,12)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vprotd xmm3,xmm3,8)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpshufd xmm0,xmm0,0x93)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vprotd xmm1,xmm1,7)
|
||||
a3(vpshufd xmm3,xmm3,0x4e)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpshufd xmm2,xmm2,0x39)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vprotd xmm3,xmm3,16)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vprotd xmm1,xmm1,12)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vprotd xmm3,xmm3,8)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpshufd xmm0,xmm0,0x39)
|
||||
a3(vprotd xmm1,xmm1,7)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a3(pshufd xmm2,xmm2,0x93)
|
||||
a2(sub eax,2)
|
||||
aj(ja scrypt_chacha_xop_loop)
|
||||
a3(vpaddd xmm0,xmm0,xmm4)
|
||||
a3(vpaddd xmm1,xmm1,xmm5)
|
||||
a3(vpaddd xmm2,xmm2,xmm6)
|
||||
a3(vpaddd xmm3,xmm3,xmm7)
|
||||
a2(lea eax,[ebx+ecx])
|
||||
a2(xor ebx,edx)
|
||||
a2(and eax,~0x7f)
|
||||
a2(add ecx,64)
|
||||
a2(shr eax,1)
|
||||
a2(add eax, edi)
|
||||
a2(cmp ecx,edx)
|
||||
a2(vmovdqa [eax+0],xmm0)
|
||||
a2(vmovdqa [eax+16],xmm1)
|
||||
a2(vmovdqa [eax+32],xmm2)
|
||||
a2(vmovdqa [eax+48],xmm3)
|
||||
a2(mov eax,[ebp+28])
|
||||
aj(jne scrypt_ChunkMix_xop_loop)
|
||||
a2(mov esp,ebp)
|
||||
a1(pop ebp)
|
||||
a1(pop esi)
|
||||
a1(pop edi)
|
||||
a1(pop ebx)
|
||||
aret(16)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_xop)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_CHACHA_XOP
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_xop)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,6)
|
||||
a2(lea r9,[rcx-64])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(vmovdqa xmm0,[rax+0])
|
||||
a2(vmovdqa xmm1,[rax+16])
|
||||
a2(vmovdqa xmm2,[rax+32])
|
||||
a2(vmovdqa xmm3,[rax+48])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[r9+0])
|
||||
a3(vpxor xmm1,xmm1,[r9+16])
|
||||
a3(vpxor xmm2,xmm2,[r9+32])
|
||||
a3(vpxor xmm3,xmm3,[r9+48])
|
||||
a1(scrypt_ChunkMix_xop_no_xor1:)
|
||||
a2(xor r8,r8)
|
||||
a2(xor r9,r9)
|
||||
a1(scrypt_ChunkMix_xop_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a3(vpxor xmm0,xmm0,[rsi+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rsi+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rsi+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rsi+r9+48])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[rdx+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rdx+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rdx+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rdx+r9+48])
|
||||
a1(scrypt_ChunkMix_xop_no_xor2:)
|
||||
a2(vmovdqa xmm4,xmm0)
|
||||
a2(vmovdqa xmm5,xmm1)
|
||||
a2(vmovdqa xmm6,xmm2)
|
||||
a2(vmovdqa xmm7,xmm3)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_chacha_xop_loop: )
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vprotd xmm3,xmm3,16)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vprotd xmm1,xmm1,12)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vprotd xmm3,xmm3,8)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpshufd xmm0,xmm0,0x93)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vprotd xmm1,xmm1,7)
|
||||
a3(vpshufd xmm3,xmm3,0x4e)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpshufd xmm2,xmm2,0x39)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vprotd xmm3,xmm3,16)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vprotd xmm1,xmm1,12)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vprotd xmm3,xmm3,8)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpshufd xmm0,xmm0,0x39)
|
||||
a3(vprotd xmm1,xmm1,7)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a3(pshufd xmm2,xmm2,0x93)
|
||||
a2(sub rax,2)
|
||||
aj(ja scrypt_chacha_xop_loop)
|
||||
a3(vpaddd xmm0,xmm0,xmm4)
|
||||
a3(vpaddd xmm1,xmm1,xmm5)
|
||||
a3(vpaddd xmm2,xmm2,xmm6)
|
||||
a3(vpaddd xmm3,xmm3,xmm7)
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0x7f)
|
||||
a2(add r9,64)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(vmovdqa [rax+0],xmm0)
|
||||
a2(vmovdqa [rax+16],xmm1)
|
||||
a2(vmovdqa [rax+32],xmm2)
|
||||
a2(vmovdqa [rax+48],xmm3)
|
||||
aj(jne scrypt_ChunkMix_xop_loop)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_xop)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED))
|
||||
|
||||
#define SCRYPT_CHACHA_XOP
|
||||
|
||||
static void asm_calling_convention NOINLINE
|
||||
scrypt_ChunkMix_xop(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x6,t0,t1,t2,t3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_roti_epi32(x3, 16);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x1 = _mm_roti_epi32(x1, 12);
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_roti_epi32(x3, 8);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x0 = _mm_shuffle_epi32(x0, 0x93);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x1 = _mm_roti_epi32(x1, 7);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x4e);
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x39);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_roti_epi32(x3, 16);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x1 = _mm_roti_epi32(x1, 12);
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_roti_epi32(x3, 8);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x0 = _mm_shuffle_epi32(x0, 0x39);
|
||||
x1 = _mm_roti_epi32(x1, 7);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x4e);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x93);
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi32(x0, t0);
|
||||
x1 = _mm_add_epi32(x1, t1);
|
||||
x2 = _mm_add_epi32(x2, t2);
|
||||
x3 = _mm_add_epi32(x3, t3);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_XOP)
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "ChaCha/8-XOP"
|
||||
#undef SCRYPT_CHACHA_INCLUDED
|
||||
#define SCRYPT_CHACHA_INCLUDED
|
||||
#endif
|
||||
69
vendor/scrypt-jane/code/scrypt-jane-mix_chacha.h
vendored
69
vendor/scrypt-jane/code/scrypt-jane-mix_chacha.h
vendored
|
|
@ -1,69 +0,0 @@
|
|||
#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "ChaCha20/8 Ref"
|
||||
|
||||
#undef SCRYPT_CHACHA_INCLUDED
|
||||
#define SCRYPT_CHACHA_INCLUDED
|
||||
#define SCRYPT_CHACHA_BASIC
|
||||
|
||||
static void
|
||||
chacha_core_basic(uint32_t state[16]) {
|
||||
size_t rounds = 8;
|
||||
uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,t;
|
||||
|
||||
x0 = state[0];
|
||||
x1 = state[1];
|
||||
x2 = state[2];
|
||||
x3 = state[3];
|
||||
x4 = state[4];
|
||||
x5 = state[5];
|
||||
x6 = state[6];
|
||||
x7 = state[7];
|
||||
x8 = state[8];
|
||||
x9 = state[9];
|
||||
x10 = state[10];
|
||||
x11 = state[11];
|
||||
x12 = state[12];
|
||||
x13 = state[13];
|
||||
x14 = state[14];
|
||||
x15 = state[15];
|
||||
|
||||
#define quarter(a,b,c,d) \
|
||||
a += b; t = d^a; d = ROTL32(t,16); \
|
||||
c += d; t = b^c; b = ROTL32(t,12); \
|
||||
a += b; t = d^a; d = ROTL32(t, 8); \
|
||||
c += d; t = b^c; b = ROTL32(t, 7);
|
||||
|
||||
for (; rounds; rounds -= 2) {
|
||||
quarter( x0, x4, x8,x12)
|
||||
quarter( x1, x5, x9,x13)
|
||||
quarter( x2, x6,x10,x14)
|
||||
quarter( x3, x7,x11,x15)
|
||||
quarter( x0, x5,x10,x15)
|
||||
quarter( x1, x6,x11,x12)
|
||||
quarter( x2, x7, x8,x13)
|
||||
quarter( x3, x4, x9,x14)
|
||||
}
|
||||
|
||||
state[0] += x0;
|
||||
state[1] += x1;
|
||||
state[2] += x2;
|
||||
state[3] += x3;
|
||||
state[4] += x4;
|
||||
state[5] += x5;
|
||||
state[6] += x6;
|
||||
state[7] += x7;
|
||||
state[8] += x8;
|
||||
state[9] += x9;
|
||||
state[10] += x10;
|
||||
state[11] += x11;
|
||||
state[12] += x12;
|
||||
state[13] += x13;
|
||||
state[14] += x14;
|
||||
state[15] += x15;
|
||||
|
||||
#undef quarter
|
||||
}
|
||||
|
||||
#endif
|
||||
381
vendor/scrypt-jane/code/scrypt-jane-mix_salsa-avx.h
vendored
381
vendor/scrypt-jane/code/scrypt-jane-mix_salsa-avx.h
vendored
|
|
@ -1,381 +0,0 @@
|
|||
/* x86 */
|
||||
#if defined(X86ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA_AVX
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_avx)
|
||||
a1(push ebx)
|
||||
a1(push edi)
|
||||
a1(push esi)
|
||||
a1(push ebp)
|
||||
a2(mov ebp,esp)
|
||||
a2(mov edi,[ebp+20])
|
||||
a2(mov esi,[ebp+24])
|
||||
a2(mov eax,[ebp+28])
|
||||
a2(mov ebx,[ebp+32])
|
||||
a2(sub esp,32)
|
||||
a2(and esp,~63)
|
||||
a2(lea edx,[ebx*2])
|
||||
a2(shl edx,6)
|
||||
a2(lea ecx,[edx-64])
|
||||
a2(and eax, eax)
|
||||
a2(movdqa xmm0,[ecx+esi+0])
|
||||
a2(movdqa xmm1,[ecx+esi+16])
|
||||
a2(movdqa xmm2,[ecx+esi+32])
|
||||
a2(movdqa xmm3,[ecx+esi+48])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[ecx+eax+0])
|
||||
a3(vpxor xmm1,xmm1,[ecx+eax+16])
|
||||
a3(vpxor xmm2,xmm2,[ecx+eax+32])
|
||||
a3(vpxor xmm3,xmm3,[ecx+eax+48])
|
||||
a1(scrypt_ChunkMix_avx_no_xor1:)
|
||||
a2(xor ecx,ecx)
|
||||
a2(xor ebx,ebx)
|
||||
a1(scrypt_ChunkMix_avx_loop:)
|
||||
a2(and eax, eax)
|
||||
a3(vpxor xmm0,xmm0,[esi+ecx+0])
|
||||
a3(vpxor xmm1,xmm1,[esi+ecx+16])
|
||||
a3(vpxor xmm2,xmm2,[esi+ecx+32])
|
||||
a3(vpxor xmm3,xmm3,[esi+ecx+48])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[eax+ecx+0])
|
||||
a3(vpxor xmm1,xmm1,[eax+ecx+16])
|
||||
a3(vpxor xmm2,xmm2,[eax+ecx+32])
|
||||
a3(vpxor xmm3,xmm3,[eax+ecx+48])
|
||||
a1(scrypt_ChunkMix_avx_no_xor2:)
|
||||
a2(vmovdqa [esp+0],xmm0)
|
||||
a2(vmovdqa [esp+16],xmm1)
|
||||
a2(vmovdqa xmm6,xmm2)
|
||||
a2(vmovdqa xmm7,xmm3)
|
||||
a2(mov eax,8)
|
||||
a1(scrypt_salsa_avx_loop: )
|
||||
a3(vpaddd xmm4, xmm1, xmm0)
|
||||
a3(vpsrld xmm5, xmm4, 25)
|
||||
a3(vpslld xmm4, xmm4, 7)
|
||||
a3(vpxor xmm3, xmm3, xmm5)
|
||||
a3(vpxor xmm3, xmm3, xmm4)
|
||||
a3(vpaddd xmm4, xmm0, xmm3)
|
||||
a3(vpsrld xmm5, xmm4, 23)
|
||||
a3(vpslld xmm4, xmm4, 9)
|
||||
a3(vpxor xmm2, xmm2, xmm5)
|
||||
a3(vpxor xmm2, xmm2, xmm4)
|
||||
a3(vpaddd xmm4, xmm3, xmm2)
|
||||
a3(vpsrld xmm5, xmm4, 19)
|
||||
a3(vpslld xmm4, xmm4, 13)
|
||||
a3(vpxor xmm1, xmm1, xmm5)
|
||||
a3(vpshufd xmm3, xmm3, 0x93)
|
||||
a3(vpxor xmm1, xmm1, xmm4)
|
||||
a3(vpaddd xmm4, xmm2, xmm1)
|
||||
a3(vpsrld xmm5, xmm4, 14)
|
||||
a3(vpslld xmm4, xmm4, 18)
|
||||
a3(vpxor xmm0, xmm0, xmm5)
|
||||
a3(vpshufd xmm2, xmm2, 0x4e)
|
||||
a3(vpxor xmm0, xmm0, xmm4)
|
||||
a3(vpaddd xmm4, xmm3, xmm0)
|
||||
a3(vpshufd xmm1, xmm1, 0x39)
|
||||
a3(vpsrld xmm5, xmm4, 25)
|
||||
a3(vpslld xmm4, xmm4, 7)
|
||||
a3(vpxor xmm1, xmm1, xmm5)
|
||||
a3(vpxor xmm1, xmm1, xmm4)
|
||||
a3(vpaddd xmm4, xmm0, xmm1)
|
||||
a3(vpsrld xmm5, xmm4, 23)
|
||||
a3(vpslld xmm4, xmm4, 9)
|
||||
a3(vpxor xmm2, xmm2, xmm5)
|
||||
a3(vpxor xmm2, xmm2, xmm4)
|
||||
a3(vpaddd xmm4, xmm1, xmm2)
|
||||
a3(vpsrld xmm5, xmm4, 19)
|
||||
a3(vpslld xmm4, xmm4, 13)
|
||||
a3(vpxor xmm3, xmm3, xmm5)
|
||||
a3(vpshufd xmm1, xmm1, 0x93)
|
||||
a3(vpxor xmm3, xmm3, xmm4)
|
||||
a3(vpaddd xmm4, xmm2, xmm3)
|
||||
a3(vpsrld xmm5, xmm4, 14)
|
||||
a3(vpslld xmm4, xmm4, 18)
|
||||
a3(vpxor xmm0, xmm0, xmm5)
|
||||
a3(vpshufd xmm2, xmm2, 0x4e)
|
||||
a3(vpxor xmm0, xmm0, xmm4)
|
||||
a3(vpshufd xmm3, xmm3, 0x39)
|
||||
a2(sub eax, 2)
|
||||
aj(ja scrypt_salsa_avx_loop)
|
||||
a3(vpaddd xmm0,xmm0,[esp+0])
|
||||
a3(vpaddd xmm1,xmm1,[esp+16])
|
||||
a3(vpaddd xmm2,xmm2,xmm6)
|
||||
a3(vpaddd xmm3,xmm3,xmm7)
|
||||
a2(lea eax,[ebx+ecx])
|
||||
a2(xor ebx,edx)
|
||||
a2(and eax,~0x7f)
|
||||
a2(add ecx,64)
|
||||
a2(shr eax,1)
|
||||
a2(add eax, edi)
|
||||
a2(cmp ecx,edx)
|
||||
a2(vmovdqa [eax+0],xmm0)
|
||||
a2(vmovdqa [eax+16],xmm1)
|
||||
a2(vmovdqa [eax+32],xmm2)
|
||||
a2(vmovdqa [eax+48],xmm3)
|
||||
a2(mov eax,[ebp+28])
|
||||
aj(jne scrypt_ChunkMix_avx_loop)
|
||||
a2(mov esp,ebp)
|
||||
a1(pop ebp)
|
||||
a1(pop esi)
|
||||
a1(pop edi)
|
||||
a1(pop ebx)
|
||||
aret(16)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_avx)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA_AVX
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_avx)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,6)
|
||||
a2(lea r9,[rcx-64])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(vmovdqa xmm0,[rax+0])
|
||||
a2(vmovdqa xmm1,[rax+16])
|
||||
a2(vmovdqa xmm2,[rax+32])
|
||||
a2(vmovdqa xmm3,[rax+48])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[r9+0])
|
||||
a3(vpxor xmm1,xmm1,[r9+16])
|
||||
a3(vpxor xmm2,xmm2,[r9+32])
|
||||
a3(vpxor xmm3,xmm3,[r9+48])
|
||||
a1(scrypt_ChunkMix_avx_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_avx_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a3(vpxor xmm0,xmm0,[rsi+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rsi+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rsi+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rsi+r9+48])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[rdx+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rdx+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rdx+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rdx+r9+48])
|
||||
a1(scrypt_ChunkMix_avx_no_xor2:)
|
||||
a2(vmovdqa xmm8,xmm0)
|
||||
a2(vmovdqa xmm9,xmm1)
|
||||
a2(vmovdqa xmm10,xmm2)
|
||||
a2(vmovdqa xmm11,xmm3)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa_avx_loop: )
|
||||
a3(vpaddd xmm4, xmm1, xmm0)
|
||||
a3(vpsrld xmm5, xmm4, 25)
|
||||
a3(vpslld xmm4, xmm4, 7)
|
||||
a3(vpxor xmm3, xmm3, xmm5)
|
||||
a3(vpxor xmm3, xmm3, xmm4)
|
||||
a3(vpaddd xmm4, xmm0, xmm3)
|
||||
a3(vpsrld xmm5, xmm4, 23)
|
||||
a3(vpslld xmm4, xmm4, 9)
|
||||
a3(vpxor xmm2, xmm2, xmm5)
|
||||
a3(vpxor xmm2, xmm2, xmm4)
|
||||
a3(vpaddd xmm4, xmm3, xmm2)
|
||||
a3(vpsrld xmm5, xmm4, 19)
|
||||
a3(vpslld xmm4, xmm4, 13)
|
||||
a3(vpxor xmm1, xmm1, xmm5)
|
||||
a3(vpshufd xmm3, xmm3, 0x93)
|
||||
a3(vpxor xmm1, xmm1, xmm4)
|
||||
a3(vpaddd xmm4, xmm2, xmm1)
|
||||
a3(vpsrld xmm5, xmm4, 14)
|
||||
a3(vpslld xmm4, xmm4, 18)
|
||||
a3(vpxor xmm0, xmm0, xmm5)
|
||||
a3(vpshufd xmm2, xmm2, 0x4e)
|
||||
a3(vpxor xmm0, xmm0, xmm4)
|
||||
a3(vpaddd xmm4, xmm3, xmm0)
|
||||
a3(vpshufd xmm1, xmm1, 0x39)
|
||||
a3(vpsrld xmm5, xmm4, 25)
|
||||
a3(vpslld xmm4, xmm4, 7)
|
||||
a3(vpxor xmm1, xmm1, xmm5)
|
||||
a3(vpxor xmm1, xmm1, xmm4)
|
||||
a3(vpaddd xmm4, xmm0, xmm1)
|
||||
a3(vpsrld xmm5, xmm4, 23)
|
||||
a3(vpslld xmm4, xmm4, 9)
|
||||
a3(vpxor xmm2, xmm2, xmm5)
|
||||
a3(vpxor xmm2, xmm2, xmm4)
|
||||
a3(vpaddd xmm4, xmm1, xmm2)
|
||||
a3(vpsrld xmm5, xmm4, 19)
|
||||
a3(vpslld xmm4, xmm4, 13)
|
||||
a3(vpxor xmm3, xmm3, xmm5)
|
||||
a3(vpshufd xmm1, xmm1, 0x93)
|
||||
a3(vpxor xmm3, xmm3, xmm4)
|
||||
a3(vpaddd xmm4, xmm2, xmm3)
|
||||
a3(vpsrld xmm5, xmm4, 14)
|
||||
a3(vpslld xmm4, xmm4, 18)
|
||||
a3(vpxor xmm0, xmm0, xmm5)
|
||||
a3(vpshufd xmm2, xmm2, 0x4e)
|
||||
a3(vpxor xmm0, xmm0, xmm4)
|
||||
a3(vpshufd xmm3, xmm3, 0x39)
|
||||
a2(sub rax, 2)
|
||||
aj(ja scrypt_salsa_avx_loop)
|
||||
a3(vpaddd xmm0,xmm0,xmm8)
|
||||
a3(vpaddd xmm1,xmm1,xmm9)
|
||||
a3(vpaddd xmm2,xmm2,xmm10)
|
||||
a3(vpaddd xmm3,xmm3,xmm11)
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0x7f)
|
||||
a2(add r9,64)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(vmovdqa [rax+0],xmm0)
|
||||
a2(vmovdqa [rax+16],xmm1)
|
||||
a2(vmovdqa [rax+32],xmm2)
|
||||
a2(vmovdqa [rax+48],xmm3)
|
||||
aj(jne scrypt_ChunkMix_avx_loop)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_avx)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA_AVX
|
||||
|
||||
static void asm_calling_convention NOINLINE
|
||||
scrypt_ChunkMix_avx(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,t0,t1,t2,t3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
x4 = x1;
|
||||
x4 = _mm_add_epi32(x4, x0);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 7);
|
||||
x5 = _mm_srli_epi32(x5, 25);
|
||||
x3 = _mm_xor_si128(x3, x4);
|
||||
x4 = x0;
|
||||
x3 = _mm_xor_si128(x3, x5);
|
||||
x4 = _mm_add_epi32(x4, x3);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 9);
|
||||
x5 = _mm_srli_epi32(x5, 23);
|
||||
x2 = _mm_xor_si128(x2, x4);
|
||||
x4 = x3;
|
||||
x2 = _mm_xor_si128(x2, x5);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x93);
|
||||
x4 = _mm_add_epi32(x4, x2);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 13);
|
||||
x5 = _mm_srli_epi32(x5, 19);
|
||||
x1 = _mm_xor_si128(x1, x4);
|
||||
x4 = x2;
|
||||
x1 = _mm_xor_si128(x1, x5);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x4e);
|
||||
x4 = _mm_add_epi32(x4, x1);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 18);
|
||||
x5 = _mm_srli_epi32(x5, 14);
|
||||
x0 = _mm_xor_si128(x0, x4);
|
||||
x4 = x3;
|
||||
x0 = _mm_xor_si128(x0, x5);
|
||||
x1 = _mm_shuffle_epi32(x1, 0x39);
|
||||
x4 = _mm_add_epi32(x4, x0);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 7);
|
||||
x5 = _mm_srli_epi32(x5, 25);
|
||||
x1 = _mm_xor_si128(x1, x4);
|
||||
x4 = x0;
|
||||
x1 = _mm_xor_si128(x1, x5);
|
||||
x4 = _mm_add_epi32(x4, x1);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 9);
|
||||
x5 = _mm_srli_epi32(x5, 23);
|
||||
x2 = _mm_xor_si128(x2, x4);
|
||||
x4 = x1;
|
||||
x2 = _mm_xor_si128(x2, x5);
|
||||
x1 = _mm_shuffle_epi32(x1, 0x93);
|
||||
x4 = _mm_add_epi32(x4, x2);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 13);
|
||||
x5 = _mm_srli_epi32(x5, 19);
|
||||
x3 = _mm_xor_si128(x3, x4);
|
||||
x4 = x2;
|
||||
x3 = _mm_xor_si128(x3, x5);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x4e);
|
||||
x4 = _mm_add_epi32(x4, x3);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 18);
|
||||
x5 = _mm_srli_epi32(x5, 14);
|
||||
x0 = _mm_xor_si128(x0, x4);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x39);
|
||||
x0 = _mm_xor_si128(x0, x5);
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi32(x0, t0);
|
||||
x1 = _mm_add_epi32(x1, t1);
|
||||
x2 = _mm_add_epi32(x2, t2);
|
||||
x3 = _mm_add_epi32(x3, t3);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA_AVX)
|
||||
/* uses salsa_core_tangle_sse2 */
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa/8-AVX"
|
||||
#undef SCRYPT_SALSA_INCLUDED
|
||||
#define SCRYPT_SALSA_INCLUDED
|
||||
#endif
|
||||
443
vendor/scrypt-jane/code/scrypt-jane-mix_salsa-sse2.h
vendored
443
vendor/scrypt-jane/code/scrypt-jane-mix_salsa-sse2.h
vendored
|
|
@ -1,443 +0,0 @@
|
|||
/* x86 */
|
||||
#if defined(X86ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA_SSE2
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_sse2)
|
||||
a1(push ebx)
|
||||
a1(push edi)
|
||||
a1(push esi)
|
||||
a1(push ebp)
|
||||
a2(mov ebp,esp)
|
||||
a2(mov edi,[ebp+20])
|
||||
a2(mov esi,[ebp+24])
|
||||
a2(mov eax,[ebp+28])
|
||||
a2(mov ebx,[ebp+32])
|
||||
a2(sub esp,32)
|
||||
a2(and esp,~63)
|
||||
a2(lea edx,[ebx*2])
|
||||
a2(shl edx,6)
|
||||
a2(lea ecx,[edx-64])
|
||||
a2(and eax, eax)
|
||||
a2(movdqa xmm0,[ecx+esi+0])
|
||||
a2(movdqa xmm1,[ecx+esi+16])
|
||||
a2(movdqa xmm2,[ecx+esi+32])
|
||||
a2(movdqa xmm3,[ecx+esi+48])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor1)
|
||||
a2(pxor xmm0,[ecx+eax+0])
|
||||
a2(pxor xmm1,[ecx+eax+16])
|
||||
a2(pxor xmm2,[ecx+eax+32])
|
||||
a2(pxor xmm3,[ecx+eax+48])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor1:)
|
||||
a2(xor ecx,ecx)
|
||||
a2(xor ebx,ebx)
|
||||
a1(scrypt_ChunkMix_sse2_loop:)
|
||||
a2(and eax, eax)
|
||||
a2(pxor xmm0,[esi+ecx+0])
|
||||
a2(pxor xmm1,[esi+ecx+16])
|
||||
a2(pxor xmm2,[esi+ecx+32])
|
||||
a2(pxor xmm3,[esi+ecx+48])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor2)
|
||||
a2(pxor xmm0,[eax+ecx+0])
|
||||
a2(pxor xmm1,[eax+ecx+16])
|
||||
a2(pxor xmm2,[eax+ecx+32])
|
||||
a2(pxor xmm3,[eax+ecx+48])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor2:)
|
||||
a2(movdqa [esp+0],xmm0)
|
||||
a2(movdqa [esp+16],xmm1)
|
||||
a2(movdqa xmm6,xmm2)
|
||||
a2(movdqa xmm7,xmm3)
|
||||
a2(mov eax,8)
|
||||
a1(scrypt_salsa_sse2_loop: )
|
||||
a2(movdqa xmm4, xmm1)
|
||||
a2(paddd xmm4, xmm0)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 7)
|
||||
a2(psrld xmm5, 25)
|
||||
a2(pxor xmm3, xmm4)
|
||||
a2(movdqa xmm4, xmm0)
|
||||
a2(pxor xmm3, xmm5)
|
||||
a2(paddd xmm4, xmm3)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 9)
|
||||
a2(psrld xmm5, 23)
|
||||
a2(pxor xmm2, xmm4)
|
||||
a2(movdqa xmm4, xmm3)
|
||||
a2(pxor xmm2, xmm5)
|
||||
a3(pshufd xmm3, xmm3, 0x93)
|
||||
a2(paddd xmm4, xmm2)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 13)
|
||||
a2(psrld xmm5, 19)
|
||||
a2(pxor xmm1, xmm4)
|
||||
a2(movdqa xmm4, xmm2)
|
||||
a2(pxor xmm1, xmm5)
|
||||
a3(pshufd xmm2, xmm2, 0x4e)
|
||||
a2(paddd xmm4, xmm1)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 18)
|
||||
a2(psrld xmm5, 14)
|
||||
a2(pxor xmm0, xmm4)
|
||||
a2(movdqa xmm4, xmm3)
|
||||
a2(pxor xmm0, xmm5)
|
||||
a3(pshufd xmm1, xmm1, 0x39)
|
||||
a2(paddd xmm4, xmm0)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 7)
|
||||
a2(psrld xmm5, 25)
|
||||
a2(pxor xmm1, xmm4)
|
||||
a2(movdqa xmm4, xmm0)
|
||||
a2(pxor xmm1, xmm5)
|
||||
a2(paddd xmm4, xmm1)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 9)
|
||||
a2(psrld xmm5, 23)
|
||||
a2(pxor xmm2, xmm4)
|
||||
a2(movdqa xmm4, xmm1)
|
||||
a2(pxor xmm2, xmm5)
|
||||
a3(pshufd xmm1, xmm1, 0x93)
|
||||
a2(paddd xmm4, xmm2)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 13)
|
||||
a2(psrld xmm5, 19)
|
||||
a2(pxor xmm3, xmm4)
|
||||
a2(movdqa xmm4, xmm2)
|
||||
a2(pxor xmm3, xmm5)
|
||||
a3(pshufd xmm2, xmm2, 0x4e)
|
||||
a2(paddd xmm4, xmm3)
|
||||
a2(sub eax, 2)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 18)
|
||||
a2(psrld xmm5, 14)
|
||||
a2(pxor xmm0, xmm4)
|
||||
a3(pshufd xmm3, xmm3, 0x39)
|
||||
a2(pxor xmm0, xmm5)
|
||||
aj(ja scrypt_salsa_sse2_loop)
|
||||
a2(paddd xmm0,[esp+0])
|
||||
a2(paddd xmm1,[esp+16])
|
||||
a2(paddd xmm2,xmm6)
|
||||
a2(paddd xmm3,xmm7)
|
||||
a2(lea eax,[ebx+ecx])
|
||||
a2(xor ebx,edx)
|
||||
a2(and eax,~0x7f)
|
||||
a2(add ecx,64)
|
||||
a2(shr eax,1)
|
||||
a2(add eax, edi)
|
||||
a2(cmp ecx,edx)
|
||||
a2(movdqa [eax+0],xmm0)
|
||||
a2(movdqa [eax+16],xmm1)
|
||||
a2(movdqa [eax+32],xmm2)
|
||||
a2(movdqa [eax+48],xmm3)
|
||||
a2(mov eax,[ebp+28])
|
||||
aj(jne scrypt_ChunkMix_sse2_loop)
|
||||
a2(mov esp,ebp)
|
||||
a1(pop ebp)
|
||||
a1(pop esi)
|
||||
a1(pop edi)
|
||||
a1(pop ebx)
|
||||
aret(16)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_sse2)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA_SSE2
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_sse2)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,6)
|
||||
a2(lea r9,[rcx-64])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(movdqa xmm0,[rax+0])
|
||||
a2(movdqa xmm1,[rax+16])
|
||||
a2(movdqa xmm2,[rax+32])
|
||||
a2(movdqa xmm3,[rax+48])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor1)
|
||||
a2(pxor xmm0,[r9+0])
|
||||
a2(pxor xmm1,[r9+16])
|
||||
a2(pxor xmm2,[r9+32])
|
||||
a2(pxor xmm3,[r9+48])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_sse2_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a2(pxor xmm0,[rsi+r9+0])
|
||||
a2(pxor xmm1,[rsi+r9+16])
|
||||
a2(pxor xmm2,[rsi+r9+32])
|
||||
a2(pxor xmm3,[rsi+r9+48])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor2)
|
||||
a2(pxor xmm0,[rdx+r9+0])
|
||||
a2(pxor xmm1,[rdx+r9+16])
|
||||
a2(pxor xmm2,[rdx+r9+32])
|
||||
a2(pxor xmm3,[rdx+r9+48])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor2:)
|
||||
a2(movdqa xmm8,xmm0)
|
||||
a2(movdqa xmm9,xmm1)
|
||||
a2(movdqa xmm10,xmm2)
|
||||
a2(movdqa xmm11,xmm3)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa_sse2_loop: )
|
||||
a2(movdqa xmm4, xmm1)
|
||||
a2(paddd xmm4, xmm0)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 7)
|
||||
a2(psrld xmm5, 25)
|
||||
a2(pxor xmm3, xmm4)
|
||||
a2(movdqa xmm4, xmm0)
|
||||
a2(pxor xmm3, xmm5)
|
||||
a2(paddd xmm4, xmm3)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 9)
|
||||
a2(psrld xmm5, 23)
|
||||
a2(pxor xmm2, xmm4)
|
||||
a2(movdqa xmm4, xmm3)
|
||||
a2(pxor xmm2, xmm5)
|
||||
a3(pshufd xmm3, xmm3, 0x93)
|
||||
a2(paddd xmm4, xmm2)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 13)
|
||||
a2(psrld xmm5, 19)
|
||||
a2(pxor xmm1, xmm4)
|
||||
a2(movdqa xmm4, xmm2)
|
||||
a2(pxor xmm1, xmm5)
|
||||
a3(pshufd xmm2, xmm2, 0x4e)
|
||||
a2(paddd xmm4, xmm1)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 18)
|
||||
a2(psrld xmm5, 14)
|
||||
a2(pxor xmm0, xmm4)
|
||||
a2(movdqa xmm4, xmm3)
|
||||
a2(pxor xmm0, xmm5)
|
||||
a3(pshufd xmm1, xmm1, 0x39)
|
||||
a2(paddd xmm4, xmm0)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 7)
|
||||
a2(psrld xmm5, 25)
|
||||
a2(pxor xmm1, xmm4)
|
||||
a2(movdqa xmm4, xmm0)
|
||||
a2(pxor xmm1, xmm5)
|
||||
a2(paddd xmm4, xmm1)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 9)
|
||||
a2(psrld xmm5, 23)
|
||||
a2(pxor xmm2, xmm4)
|
||||
a2(movdqa xmm4, xmm1)
|
||||
a2(pxor xmm2, xmm5)
|
||||
a3(pshufd xmm1, xmm1, 0x93)
|
||||
a2(paddd xmm4, xmm2)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 13)
|
||||
a2(psrld xmm5, 19)
|
||||
a2(pxor xmm3, xmm4)
|
||||
a2(movdqa xmm4, xmm2)
|
||||
a2(pxor xmm3, xmm5)
|
||||
a3(pshufd xmm2, xmm2, 0x4e)
|
||||
a2(paddd xmm4, xmm3)
|
||||
a2(sub rax, 2)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 18)
|
||||
a2(psrld xmm5, 14)
|
||||
a2(pxor xmm0, xmm4)
|
||||
a3(pshufd xmm3, xmm3, 0x39)
|
||||
a2(pxor xmm0, xmm5)
|
||||
aj(ja scrypt_salsa_sse2_loop)
|
||||
a2(paddd xmm0,xmm8)
|
||||
a2(paddd xmm1,xmm9)
|
||||
a2(paddd xmm2,xmm10)
|
||||
a2(paddd xmm3,xmm11)
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0x7f)
|
||||
a2(add r9,64)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(movdqa [rax+0],xmm0)
|
||||
a2(movdqa [rax+16],xmm1)
|
||||
a2(movdqa [rax+32],xmm2)
|
||||
a2(movdqa [rax+48],xmm3)
|
||||
aj(jne scrypt_ChunkMix_sse2_loop)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_sse2)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA_SSE2
|
||||
|
||||
static void NOINLINE asm_calling_convention
|
||||
scrypt_ChunkMix_sse2(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,t0,t1,t2,t3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
x4 = x1;
|
||||
x4 = _mm_add_epi32(x4, x0);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 7);
|
||||
x5 = _mm_srli_epi32(x5, 25);
|
||||
x3 = _mm_xor_si128(x3, x4);
|
||||
x4 = x0;
|
||||
x3 = _mm_xor_si128(x3, x5);
|
||||
x4 = _mm_add_epi32(x4, x3);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 9);
|
||||
x5 = _mm_srli_epi32(x5, 23);
|
||||
x2 = _mm_xor_si128(x2, x4);
|
||||
x4 = x3;
|
||||
x2 = _mm_xor_si128(x2, x5);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x93);
|
||||
x4 = _mm_add_epi32(x4, x2);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 13);
|
||||
x5 = _mm_srli_epi32(x5, 19);
|
||||
x1 = _mm_xor_si128(x1, x4);
|
||||
x4 = x2;
|
||||
x1 = _mm_xor_si128(x1, x5);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x4e);
|
||||
x4 = _mm_add_epi32(x4, x1);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 18);
|
||||
x5 = _mm_srli_epi32(x5, 14);
|
||||
x0 = _mm_xor_si128(x0, x4);
|
||||
x4 = x3;
|
||||
x0 = _mm_xor_si128(x0, x5);
|
||||
x1 = _mm_shuffle_epi32(x1, 0x39);
|
||||
x4 = _mm_add_epi32(x4, x0);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 7);
|
||||
x5 = _mm_srli_epi32(x5, 25);
|
||||
x1 = _mm_xor_si128(x1, x4);
|
||||
x4 = x0;
|
||||
x1 = _mm_xor_si128(x1, x5);
|
||||
x4 = _mm_add_epi32(x4, x1);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 9);
|
||||
x5 = _mm_srli_epi32(x5, 23);
|
||||
x2 = _mm_xor_si128(x2, x4);
|
||||
x4 = x1;
|
||||
x2 = _mm_xor_si128(x2, x5);
|
||||
x1 = _mm_shuffle_epi32(x1, 0x93);
|
||||
x4 = _mm_add_epi32(x4, x2);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 13);
|
||||
x5 = _mm_srli_epi32(x5, 19);
|
||||
x3 = _mm_xor_si128(x3, x4);
|
||||
x4 = x2;
|
||||
x3 = _mm_xor_si128(x3, x5);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x4e);
|
||||
x4 = _mm_add_epi32(x4, x3);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 18);
|
||||
x5 = _mm_srli_epi32(x5, 14);
|
||||
x0 = _mm_xor_si128(x0, x4);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x39);
|
||||
x0 = _mm_xor_si128(x0, x5);
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi32(x0, t0);
|
||||
x1 = _mm_add_epi32(x1, t1);
|
||||
x2 = _mm_add_epi32(x2, t2);
|
||||
x3 = _mm_add_epi32(x3, t3);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA_SSE2)
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa/8-SSE2"
|
||||
#undef SCRYPT_SALSA_INCLUDED
|
||||
#define SCRYPT_SALSA_INCLUDED
|
||||
#endif
|
||||
|
||||
/* used by avx,etc as well */
|
||||
#if defined(SCRYPT_SALSA_INCLUDED)
|
||||
/*
|
||||
Default layout:
|
||||
0 1 2 3
|
||||
4 5 6 7
|
||||
8 9 10 11
|
||||
12 13 14 15
|
||||
|
||||
SSE2 layout:
|
||||
0 5 10 15
|
||||
12 1 6 11
|
||||
8 13 2 7
|
||||
4 9 14 3
|
||||
*/
|
||||
|
||||
static void asm_calling_convention
|
||||
salsa_core_tangle_sse2(uint32_t *blocks, size_t count) {
|
||||
uint32_t t;
|
||||
while (count--) {
|
||||
t = blocks[1]; blocks[1] = blocks[5]; blocks[5] = t;
|
||||
t = blocks[2]; blocks[2] = blocks[10]; blocks[10] = t;
|
||||
t = blocks[3]; blocks[3] = blocks[15]; blocks[15] = t;
|
||||
t = blocks[4]; blocks[4] = blocks[12]; blocks[12] = t;
|
||||
t = blocks[7]; blocks[7] = blocks[11]; blocks[11] = t;
|
||||
t = blocks[9]; blocks[9] = blocks[13]; blocks[13] = t;
|
||||
blocks += 16;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
317
vendor/scrypt-jane/code/scrypt-jane-mix_salsa-xop.h
vendored
317
vendor/scrypt-jane/code/scrypt-jane-mix_salsa-xop.h
vendored
|
|
@ -1,317 +0,0 @@
|
|||
/* x86 */
|
||||
#if defined(X86ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA_XOP
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_xop)
|
||||
a1(push ebx)
|
||||
a1(push edi)
|
||||
a1(push esi)
|
||||
a1(push ebp)
|
||||
a2(mov ebp,esp)
|
||||
a2(mov edi,[ebp+20])
|
||||
a2(mov esi,[ebp+24])
|
||||
a2(mov eax,[ebp+28])
|
||||
a2(mov ebx,[ebp+32])
|
||||
a2(sub esp,32)
|
||||
a2(and esp,~63)
|
||||
a2(lea edx,[ebx*2])
|
||||
a2(shl edx,6)
|
||||
a2(lea ecx,[edx-64])
|
||||
a2(and eax, eax)
|
||||
a2(movdqa xmm0,[ecx+esi+0])
|
||||
a2(movdqa xmm1,[ecx+esi+16])
|
||||
a2(movdqa xmm2,[ecx+esi+32])
|
||||
a2(movdqa xmm3,[ecx+esi+48])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[ecx+eax+0])
|
||||
a3(vpxor xmm1,xmm1,[ecx+eax+16])
|
||||
a3(vpxor xmm2,xmm2,[ecx+eax+32])
|
||||
a3(vpxor xmm3,xmm3,[ecx+eax+48])
|
||||
a1(scrypt_ChunkMix_xop_no_xor1:)
|
||||
a2(xor ecx,ecx)
|
||||
a2(xor ebx,ebx)
|
||||
a1(scrypt_ChunkMix_xop_loop:)
|
||||
a2(and eax, eax)
|
||||
a3(vpxor xmm0,xmm0,[esi+ecx+0])
|
||||
a3(vpxor xmm1,xmm1,[esi+ecx+16])
|
||||
a3(vpxor xmm2,xmm2,[esi+ecx+32])
|
||||
a3(vpxor xmm3,xmm3,[esi+ecx+48])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[eax+ecx+0])
|
||||
a3(vpxor xmm1,xmm1,[eax+ecx+16])
|
||||
a3(vpxor xmm2,xmm2,[eax+ecx+32])
|
||||
a3(vpxor xmm3,xmm3,[eax+ecx+48])
|
||||
a1(scrypt_ChunkMix_xop_no_xor2:)
|
||||
a2(vmovdqa [esp+0],xmm0)
|
||||
a2(vmovdqa [esp+16],xmm1)
|
||||
a2(vmovdqa xmm6,xmm2)
|
||||
a2(vmovdqa xmm7,xmm3)
|
||||
a2(mov eax,8)
|
||||
a1(scrypt_salsa_xop_loop: )
|
||||
a3(vpaddd xmm4, xmm1, xmm0)
|
||||
a3(vprotd xmm4, xmm4, 7)
|
||||
a3(vpxor xmm3, xmm3, xmm4)
|
||||
a3(vpaddd xmm4, xmm0, xmm3)
|
||||
a3(vprotd xmm4, xmm4, 9)
|
||||
a3(vpxor xmm2, xmm2, xmm4)
|
||||
a3(vpaddd xmm4, xmm3, xmm2)
|
||||
a3(vprotd xmm4, xmm4, 13)
|
||||
a3(vpxor xmm1, xmm1, xmm4)
|
||||
a3(vpaddd xmm4, xmm2, xmm1)
|
||||
a3(pshufd xmm3, xmm3, 0x93)
|
||||
a3(vprotd xmm4, xmm4, 18)
|
||||
a3(pshufd xmm2, xmm2, 0x4e)
|
||||
a3(vpxor xmm0, xmm0, xmm4)
|
||||
a3(pshufd xmm1, xmm1, 0x39)
|
||||
a3(vpaddd xmm4, xmm3, xmm0)
|
||||
a3(vprotd xmm4, xmm4, 7)
|
||||
a3(vpxor xmm1, xmm1, xmm4)
|
||||
a3(vpaddd xmm4, xmm0, xmm1)
|
||||
a3(vprotd xmm4, xmm4, 9)
|
||||
a3(vpxor xmm2, xmm2, xmm4)
|
||||
a3(vpaddd xmm4, xmm1, xmm2)
|
||||
a3(vprotd xmm4, xmm4, 13)
|
||||
a3(vpxor xmm3, xmm3, xmm4)
|
||||
a3(pshufd xmm1, xmm1, 0x93)
|
||||
a3(vpaddd xmm4, xmm2, xmm3)
|
||||
a3(pshufd xmm2, xmm2, 0x4e)
|
||||
a3(vprotd xmm4, xmm4, 18)
|
||||
a3(pshufd xmm3, xmm3, 0x39)
|
||||
a3(vpxor xmm0, xmm0, xmm4)
|
||||
a2(sub eax, 2)
|
||||
aj(ja scrypt_salsa_xop_loop)
|
||||
a3(vpaddd xmm0,xmm0,[esp+0])
|
||||
a3(vpaddd xmm1,xmm1,[esp+16])
|
||||
a3(vpaddd xmm2,xmm2,xmm6)
|
||||
a3(vpaddd xmm3,xmm3,xmm7)
|
||||
a2(lea eax,[ebx+ecx])
|
||||
a2(xor ebx,edx)
|
||||
a2(and eax,~0x7f)
|
||||
a2(add ecx,64)
|
||||
a2(shr eax,1)
|
||||
a2(add eax, edi)
|
||||
a2(cmp ecx,edx)
|
||||
a2(vmovdqa [eax+0],xmm0)
|
||||
a2(vmovdqa [eax+16],xmm1)
|
||||
a2(vmovdqa [eax+32],xmm2)
|
||||
a2(vmovdqa [eax+48],xmm3)
|
||||
a2(mov eax,[ebp+28])
|
||||
aj(jne scrypt_ChunkMix_xop_loop)
|
||||
a2(mov esp,ebp)
|
||||
a1(pop ebp)
|
||||
a1(pop esi)
|
||||
a1(pop edi)
|
||||
a1(pop ebx)
|
||||
aret(16)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_xop)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA_XOP
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_xop)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,6)
|
||||
a2(lea r9,[rcx-64])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(vmovdqa xmm0,[rax+0])
|
||||
a2(vmovdqa xmm1,[rax+16])
|
||||
a2(vmovdqa xmm2,[rax+32])
|
||||
a2(vmovdqa xmm3,[rax+48])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[r9+0])
|
||||
a3(vpxor xmm1,xmm1,[r9+16])
|
||||
a3(vpxor xmm2,xmm2,[r9+32])
|
||||
a3(vpxor xmm3,xmm3,[r9+48])
|
||||
a1(scrypt_ChunkMix_xop_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_xop_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a3(vpxor xmm0,xmm0,[rsi+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rsi+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rsi+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rsi+r9+48])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[rdx+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rdx+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rdx+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rdx+r9+48])
|
||||
a1(scrypt_ChunkMix_xop_no_xor2:)
|
||||
a2(vmovdqa xmm8,xmm0)
|
||||
a2(vmovdqa xmm9,xmm1)
|
||||
a2(vmovdqa xmm10,xmm2)
|
||||
a2(vmovdqa xmm11,xmm3)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa_xop_loop: )
|
||||
a3(vpaddd xmm4, xmm1, xmm0)
|
||||
a3(vprotd xmm4, xmm4, 7)
|
||||
a3(vpxor xmm3, xmm3, xmm4)
|
||||
a3(vpaddd xmm4, xmm0, xmm3)
|
||||
a3(vprotd xmm4, xmm4, 9)
|
||||
a3(vpxor xmm2, xmm2, xmm4)
|
||||
a3(vpaddd xmm4, xmm3, xmm2)
|
||||
a3(vprotd xmm4, xmm4, 13)
|
||||
a3(vpxor xmm1, xmm1, xmm4)
|
||||
a3(vpaddd xmm4, xmm2, xmm1)
|
||||
a3(pshufd xmm3, xmm3, 0x93)
|
||||
a3(vprotd xmm4, xmm4, 18)
|
||||
a3(pshufd xmm2, xmm2, 0x4e)
|
||||
a3(vpxor xmm0, xmm0, xmm4)
|
||||
a3(pshufd xmm1, xmm1, 0x39)
|
||||
a3(vpaddd xmm4, xmm3, xmm0)
|
||||
a3(vprotd xmm4, xmm4, 7)
|
||||
a3(vpxor xmm1, xmm1, xmm4)
|
||||
a3(vpaddd xmm4, xmm0, xmm1)
|
||||
a3(vprotd xmm4, xmm4, 9)
|
||||
a3(vpxor xmm2, xmm2, xmm4)
|
||||
a3(vpaddd xmm4, xmm1, xmm2)
|
||||
a3(vprotd xmm4, xmm4, 13)
|
||||
a3(vpxor xmm3, xmm3, xmm4)
|
||||
a3(pshufd xmm1, xmm1, 0x93)
|
||||
a3(vpaddd xmm4, xmm2, xmm3)
|
||||
a3(pshufd xmm2, xmm2, 0x4e)
|
||||
a3(vprotd xmm4, xmm4, 18)
|
||||
a3(pshufd xmm3, xmm3, 0x39)
|
||||
a3(vpxor xmm0, xmm0, xmm4)
|
||||
a2(sub rax, 2)
|
||||
aj(ja scrypt_salsa_xop_loop)
|
||||
a3(vpaddd xmm0,xmm0,xmm8)
|
||||
a3(vpaddd xmm1,xmm1,xmm9)
|
||||
a3(vpaddd xmm2,xmm2,xmm10)
|
||||
a3(vpaddd xmm3,xmm3,xmm11)
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0x7f)
|
||||
a2(add r9,64)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(vmovdqa [rax+0],xmm0)
|
||||
a2(vmovdqa [rax+16],xmm1)
|
||||
a2(vmovdqa [rax+32],xmm2)
|
||||
a2(vmovdqa [rax+48],xmm3)
|
||||
aj(jne scrypt_ChunkMix_xop_loop)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_xop)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA_XOP
|
||||
|
||||
static void asm_calling_convention NOINLINE
|
||||
scrypt_ChunkMix_xop(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,t0,t1,t2,t3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
x4 = _mm_add_epi32(x1, x0);
|
||||
x4 = _mm_roti_epi32(x4, 7);
|
||||
x3 = _mm_xor_si128(x3, x4);
|
||||
x4 = _mm_add_epi32(x0, x3);
|
||||
x4 = _mm_roti_epi32(x4, 9);
|
||||
x2 = _mm_xor_si128(x2, x4);
|
||||
x4 = _mm_add_epi32(x3, x2);
|
||||
x4 = _mm_roti_epi32(x4, 13);
|
||||
x1 = _mm_xor_si128(x1, x4);
|
||||
x4 = _mm_add_epi32(x2, x1);
|
||||
x4 = _mm_roti_epi32(x4, 18);
|
||||
x0 = _mm_xor_si128(x0, x4);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x93);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x4e);
|
||||
x1 = _mm_shuffle_epi32(x1, 0x39);
|
||||
x4 = _mm_add_epi32(x3, x0);
|
||||
x4 = _mm_roti_epi32(x4, 7);
|
||||
x1 = _mm_xor_si128(x1, x4);
|
||||
x4 = _mm_add_epi32(x0, x1);
|
||||
x4 = _mm_roti_epi32(x4, 9);
|
||||
x2 = _mm_xor_si128(x2, x4);
|
||||
x4 = _mm_add_epi32(x1, x2);
|
||||
x4 = _mm_roti_epi32(x4, 13);
|
||||
x3 = _mm_xor_si128(x3, x4);
|
||||
x4 = _mm_add_epi32(x2, x3);
|
||||
x4 = _mm_roti_epi32(x4, 18);
|
||||
x0 = _mm_xor_si128(x0, x4);
|
||||
x1 = _mm_shuffle_epi32(x1, 0x93);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x4e);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x39);
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi32(x0, t0);
|
||||
x1 = _mm_add_epi32(x1, t1);
|
||||
x2 = _mm_add_epi32(x2, t2);
|
||||
x3 = _mm_add_epi32(x3, t3);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA_XOP)
|
||||
/* uses salsa_core_tangle_sse2 */
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa/8-XOP"
|
||||
#undef SCRYPT_SALSA_INCLUDED
|
||||
#define SCRYPT_SALSA_INCLUDED
|
||||
#endif
|
||||
70
vendor/scrypt-jane/code/scrypt-jane-mix_salsa.h
vendored
70
vendor/scrypt-jane/code/scrypt-jane-mix_salsa.h
vendored
|
|
@ -1,70 +0,0 @@
|
|||
#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa20/8 Ref"
|
||||
|
||||
#undef SCRYPT_SALSA_INCLUDED
|
||||
#define SCRYPT_SALSA_INCLUDED
|
||||
#define SCRYPT_SALSA_BASIC
|
||||
|
||||
static void
|
||||
salsa_core_basic(uint32_t state[16]) {
|
||||
size_t rounds = 8;
|
||||
uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,t;
|
||||
|
||||
x0 = state[0];
|
||||
x1 = state[1];
|
||||
x2 = state[2];
|
||||
x3 = state[3];
|
||||
x4 = state[4];
|
||||
x5 = state[5];
|
||||
x6 = state[6];
|
||||
x7 = state[7];
|
||||
x8 = state[8];
|
||||
x9 = state[9];
|
||||
x10 = state[10];
|
||||
x11 = state[11];
|
||||
x12 = state[12];
|
||||
x13 = state[13];
|
||||
x14 = state[14];
|
||||
x15 = state[15];
|
||||
|
||||
#define quarter(a,b,c,d) \
|
||||
t = a+d; t = ROTL32(t, 7); b ^= t; \
|
||||
t = b+a; t = ROTL32(t, 9); c ^= t; \
|
||||
t = c+b; t = ROTL32(t, 13); d ^= t; \
|
||||
t = d+c; t = ROTL32(t, 18); a ^= t; \
|
||||
|
||||
for (; rounds; rounds -= 2) {
|
||||
quarter( x0, x4, x8,x12)
|
||||
quarter( x5, x9,x13, x1)
|
||||
quarter(x10,x14, x2, x6)
|
||||
quarter(x15, x3, x7,x11)
|
||||
quarter( x0, x1, x2, x3)
|
||||
quarter( x5, x6, x7, x4)
|
||||
quarter(x10,x11, x8, x9)
|
||||
quarter(x15,x12,x13,x14)
|
||||
}
|
||||
|
||||
state[0] += x0;
|
||||
state[1] += x1;
|
||||
state[2] += x2;
|
||||
state[3] += x3;
|
||||
state[4] += x4;
|
||||
state[5] += x5;
|
||||
state[6] += x6;
|
||||
state[7] += x7;
|
||||
state[8] += x8;
|
||||
state[9] += x9;
|
||||
state[10] += x10;
|
||||
state[11] += x11;
|
||||
state[12] += x12;
|
||||
state[13] += x13;
|
||||
state[14] += x14;
|
||||
state[15] += x15;
|
||||
|
||||
#undef quarter
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -1,367 +0,0 @@
|
|||
/* x64 */
|
||||
#if defined(X86_64ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA64_AVX
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_avx)
|
||||
a1(push rbp)
|
||||
a2(mov rbp, rsp)
|
||||
a2(and rsp, ~63)
|
||||
a2(sub rsp, 128)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,7)
|
||||
a2(lea r9,[rcx-128])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(vmovdqa xmm0,[rax+0])
|
||||
a2(vmovdqa xmm1,[rax+16])
|
||||
a2(vmovdqa xmm2,[rax+32])
|
||||
a2(vmovdqa xmm3,[rax+48])
|
||||
a2(vmovdqa xmm4,[rax+64])
|
||||
a2(vmovdqa xmm5,[rax+80])
|
||||
a2(vmovdqa xmm6,[rax+96])
|
||||
a2(vmovdqa xmm7,[rax+112])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[r9+0])
|
||||
a3(vpxor xmm1,xmm1,[r9+16])
|
||||
a3(vpxor xmm2,xmm2,[r9+32])
|
||||
a3(vpxor xmm3,xmm3,[r9+48])
|
||||
a3(vpxor xmm4,xmm4,[r9+64])
|
||||
a3(vpxor xmm5,xmm5,[r9+80])
|
||||
a3(vpxor xmm6,xmm6,[r9+96])
|
||||
a3(vpxor xmm7,xmm7,[r9+112])
|
||||
a1(scrypt_ChunkMix_avx_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_avx_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a3(vpxor xmm0,xmm0,[rsi+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rsi+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rsi+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rsi+r9+48])
|
||||
a3(vpxor xmm4,xmm4,[rsi+r9+64])
|
||||
a3(vpxor xmm5,xmm5,[rsi+r9+80])
|
||||
a3(vpxor xmm6,xmm6,[rsi+r9+96])
|
||||
a3(vpxor xmm7,xmm7,[rsi+r9+112])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[rdx+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rdx+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rdx+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rdx+r9+48])
|
||||
a3(vpxor xmm4,xmm4,[rdx+r9+64])
|
||||
a3(vpxor xmm5,xmm5,[rdx+r9+80])
|
||||
a3(vpxor xmm6,xmm6,[rdx+r9+96])
|
||||
a3(vpxor xmm7,xmm7,[rdx+r9+112])
|
||||
a1(scrypt_ChunkMix_avx_no_xor2:)
|
||||
a2(vmovdqa [rsp+0],xmm0)
|
||||
a2(vmovdqa [rsp+16],xmm1)
|
||||
a2(vmovdqa [rsp+32],xmm2)
|
||||
a2(vmovdqa [rsp+48],xmm3)
|
||||
a2(vmovdqa [rsp+64],xmm4)
|
||||
a2(vmovdqa [rsp+80],xmm5)
|
||||
a2(vmovdqa [rsp+96],xmm6)
|
||||
a2(vmovdqa [rsp+112],xmm7)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa64_avx_loop: )
|
||||
a3(vpaddq xmm8, xmm0, xmm2)
|
||||
a3(vpaddq xmm9, xmm1, xmm3)
|
||||
a3(vpshufd xmm8, xmm8, 0xb1)
|
||||
a3(vpshufd xmm9, xmm9, 0xb1)
|
||||
a3(vpxor xmm6, xmm6, xmm8)
|
||||
a3(vpxor xmm7, xmm7, xmm9)
|
||||
a3(vpaddq xmm10, xmm0, xmm6)
|
||||
a3(vpaddq xmm11, xmm1, xmm7)
|
||||
a3(vpsrlq xmm8, xmm10, 51)
|
||||
a3(vpsrlq xmm9, xmm11, 51)
|
||||
a3(vpsllq xmm10, xmm10, 13)
|
||||
a3(vpsllq xmm11, xmm11, 13)
|
||||
a3(vpxor xmm4, xmm4, xmm8)
|
||||
a3(vpxor xmm5, xmm5, xmm9)
|
||||
a3(vpxor xmm4, xmm4, xmm10)
|
||||
a3(vpxor xmm5, xmm5, xmm11)
|
||||
a3(vpaddq xmm8, xmm6, xmm4)
|
||||
a3(vpaddq xmm9, xmm7, xmm5)
|
||||
a3(vpsrlq xmm10, xmm8, 25)
|
||||
a3(vpsrlq xmm11, xmm9, 25)
|
||||
a3(vpsllq xmm8, xmm8, 39)
|
||||
a3(vpsllq xmm9, xmm9, 39)
|
||||
a3(vpxor xmm2, xmm2, xmm10)
|
||||
a3(vpxor xmm3, xmm3, xmm11)
|
||||
a3(vpxor xmm2, xmm2, xmm8)
|
||||
a3(vpxor xmm3, xmm3, xmm9)
|
||||
a3(vpaddq xmm10, xmm4, xmm2)
|
||||
a3(vpaddq xmm11, xmm5, xmm3)
|
||||
a3(vpshufd xmm10, xmm10, 0xb1)
|
||||
a3(vpshufd xmm11, xmm11, 0xb1)
|
||||
a3(vpxor xmm0, xmm0, xmm10)
|
||||
a3(vpxor xmm1, xmm1, xmm11)
|
||||
a2(vmovdqa xmm8, xmm2)
|
||||
a2(vmovdqa xmm9, xmm3)
|
||||
a4(vpalignr xmm2, xmm6, xmm7, 8)
|
||||
a4(vpalignr xmm3, xmm7, xmm6, 8)
|
||||
a4(vpalignr xmm6, xmm9, xmm8, 8)
|
||||
a4(vpalignr xmm7, xmm8, xmm9, 8)
|
||||
a3(vpaddq xmm10, xmm0, xmm2)
|
||||
a3(vpaddq xmm11, xmm1, xmm3)
|
||||
a3(vpshufd xmm10, xmm10, 0xb1)
|
||||
a3(vpshufd xmm11, xmm11, 0xb1)
|
||||
a3(vpxor xmm6, xmm6, xmm10)
|
||||
a3(vpxor xmm7, xmm7, xmm11)
|
||||
a3(vpaddq xmm8, xmm0, xmm6)
|
||||
a3(vpaddq xmm9, xmm1, xmm7)
|
||||
a3(vpsrlq xmm10, xmm8, 51)
|
||||
a3(vpsrlq xmm11, xmm9, 51)
|
||||
a3(vpsllq xmm8, xmm8, 13)
|
||||
a3(vpsllq xmm9, xmm9, 13)
|
||||
a3(vpxor xmm5, xmm5, xmm10)
|
||||
a3(vpxor xmm4, xmm4, xmm11)
|
||||
a3(vpxor xmm5, xmm5, xmm8)
|
||||
a3(vpxor xmm4, xmm4, xmm9)
|
||||
a3(vpaddq xmm10, xmm6, xmm5)
|
||||
a3(vpaddq xmm11, xmm7, xmm4)
|
||||
a3(vpsrlq xmm8, xmm10, 25)
|
||||
a3(vpsrlq xmm9, xmm11, 25)
|
||||
a3(vpsllq xmm10, xmm10, 39)
|
||||
a3(vpsllq xmm11, xmm11, 39)
|
||||
a3(vpxor xmm2, xmm2, xmm8)
|
||||
a3(vpxor xmm3, xmm3, xmm9)
|
||||
a3(vpxor xmm2, xmm2, xmm10)
|
||||
a3(vpxor xmm3, xmm3, xmm11)
|
||||
a3(vpaddq xmm8, xmm5, xmm2)
|
||||
a3(vpaddq xmm9, xmm4, xmm3)
|
||||
a3(vpshufd xmm8, xmm8, 0xb1)
|
||||
a3(vpshufd xmm9, xmm9, 0xb1)
|
||||
a3(vpxor xmm0, xmm0, xmm8)
|
||||
a3(vpxor xmm1, xmm1, xmm9)
|
||||
a2(vmovdqa xmm10, xmm2)
|
||||
a2(vmovdqa xmm11, xmm3)
|
||||
a4(vpalignr xmm2, xmm6, xmm7, 8)
|
||||
a4(vpalignr xmm3, xmm7, xmm6, 8)
|
||||
a4(vpalignr xmm6, xmm11, xmm10, 8)
|
||||
a4(vpalignr xmm7, xmm10, xmm11, 8)
|
||||
a2(sub rax, 2)
|
||||
aj(ja scrypt_salsa64_avx_loop)
|
||||
a3(vpaddq xmm0,xmm0,[rsp+0])
|
||||
a3(vpaddq xmm1,xmm1,[rsp+16])
|
||||
a3(vpaddq xmm2,xmm2,[rsp+32])
|
||||
a3(vpaddq xmm3,xmm3,[rsp+48])
|
||||
a3(vpaddq xmm4,xmm4,[rsp+64])
|
||||
a3(vpaddq xmm5,xmm5,[rsp+80])
|
||||
a3(vpaddq xmm6,xmm6,[rsp+96])
|
||||
a3(vpaddq xmm7,xmm7,[rsp+112])
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0xff)
|
||||
a2(add r9,128)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(vmovdqa [rax+0],xmm0)
|
||||
a2(vmovdqa [rax+16],xmm1)
|
||||
a2(vmovdqa [rax+32],xmm2)
|
||||
a2(vmovdqa [rax+48],xmm3)
|
||||
a2(vmovdqa [rax+64],xmm4)
|
||||
a2(vmovdqa [rax+80],xmm5)
|
||||
a2(vmovdqa [rax+96],xmm6)
|
||||
a2(vmovdqa [rax+112],xmm7)
|
||||
aj(jne scrypt_ChunkMix_avx_loop)
|
||||
a2(mov rsp, rbp)
|
||||
a1(pop rbp)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_avx)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA64_AVX
|
||||
|
||||
static void asm_calling_convention
|
||||
scrypt_ChunkMix_avx(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
x4 = xmmp[4];
|
||||
x5 = xmmp[5];
|
||||
x6 = xmmp[6];
|
||||
x7 = xmmp[7];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
t4 = x4;
|
||||
t5 = x5;
|
||||
t6 = x6;
|
||||
t7 = x7;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x4 = _mm_xor_si128(x4, z2);
|
||||
x5 = _mm_xor_si128(x5, z3);
|
||||
x4 = _mm_xor_si128(x4, z0);
|
||||
x5 = _mm_xor_si128(x5, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x4, x6);
|
||||
z1 = _mm_add_epi64(x5, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x4);
|
||||
z1 = _mm_add_epi64(x3, x5);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x5 = _mm_xor_si128(x5, z2);
|
||||
x4 = _mm_xor_si128(x4, z3);
|
||||
x5 = _mm_xor_si128(x5, z0);
|
||||
x4 = _mm_xor_si128(x4, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x5, x6);
|
||||
z1 = _mm_add_epi64(x4, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x5);
|
||||
z1 = _mm_add_epi64(x3, x4);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi64(x0, t0);
|
||||
x1 = _mm_add_epi64(x1, t1);
|
||||
x2 = _mm_add_epi64(x2, t2);
|
||||
x3 = _mm_add_epi64(x3, t3);
|
||||
x4 = _mm_add_epi64(x4, t4);
|
||||
x5 = _mm_add_epi64(x5, t5);
|
||||
x6 = _mm_add_epi64(x6, t6);
|
||||
x7 = _mm_add_epi64(x7, t7);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
xmmp[4] = x4;
|
||||
xmmp[5] = x5;
|
||||
xmmp[6] = x6;
|
||||
xmmp[7] = x7;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX)
|
||||
/* uses salsa64_core_tangle_sse2 */
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8-AVX"
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#endif
|
||||
|
|
@ -1,221 +0,0 @@
|
|||
/* x64 */
|
||||
#if defined(X86_64ASM_AVX2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA64_AVX2
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_avx2)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_avx2)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,7)
|
||||
a2(lea r9,[rcx-128])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(vmovdqa ymm0,[rax+0])
|
||||
a2(vmovdqa ymm1,[rax+32])
|
||||
a2(vmovdqa ymm2,[rax+64])
|
||||
a2(vmovdqa ymm3,[rax+96])
|
||||
aj(jz scrypt_ChunkMix_avx2_no_xor1)
|
||||
a3(vpxor ymm0,ymm0,[r9+0])
|
||||
a3(vpxor ymm1,ymm1,[r9+32])
|
||||
a3(vpxor ymm2,ymm2,[r9+64])
|
||||
a3(vpxor ymm3,ymm3,[r9+96])
|
||||
a1(scrypt_ChunkMix_avx2_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_avx2_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a3(vpxor ymm0,ymm0,[rsi+r9+0])
|
||||
a3(vpxor ymm1,ymm1,[rsi+r9+32])
|
||||
a3(vpxor ymm2,ymm2,[rsi+r9+64])
|
||||
a3(vpxor ymm3,ymm3,[rsi+r9+96])
|
||||
aj(jz scrypt_ChunkMix_avx2_no_xor2)
|
||||
a3(vpxor ymm0,ymm0,[rdx+r9+0])
|
||||
a3(vpxor ymm1,ymm1,[rdx+r9+32])
|
||||
a3(vpxor ymm2,ymm2,[rdx+r9+64])
|
||||
a3(vpxor ymm3,ymm3,[rdx+r9+96])
|
||||
a1(scrypt_ChunkMix_avx2_no_xor2:)
|
||||
a2(vmovdqa ymm6,ymm0)
|
||||
a2(vmovdqa ymm7,ymm1)
|
||||
a2(vmovdqa ymm8,ymm2)
|
||||
a2(vmovdqa ymm9,ymm3)
|
||||
a2(mov rax,4)
|
||||
a1(scrypt_salsa64_avx2_loop: )
|
||||
a3(vpaddq ymm4, ymm1, ymm0)
|
||||
a3(vpshufd ymm4, ymm4, 0xb1)
|
||||
a3(vpxor ymm3, ymm3, ymm4)
|
||||
a3(vpaddq ymm4, ymm0, ymm3)
|
||||
a3(vpsrlq ymm5, ymm4, 51)
|
||||
a3(vpxor ymm2, ymm2, ymm5)
|
||||
a3(vpsllq ymm4, ymm4, 13)
|
||||
a3(vpxor ymm2, ymm2, ymm4)
|
||||
a3(vpaddq ymm4, ymm3, ymm2)
|
||||
a3(vpsrlq ymm5, ymm4, 25)
|
||||
a3(vpxor ymm1, ymm1, ymm5)
|
||||
a3(vpsllq ymm4, ymm4, 39)
|
||||
a3(vpxor ymm1, ymm1, ymm4)
|
||||
a3(vpaddq ymm4, ymm2, ymm1)
|
||||
a3(vpshufd ymm4, ymm4, 0xb1)
|
||||
a3(vpermq ymm1, ymm1, 0x39)
|
||||
a3(vpermq ymm10, ymm2, 0x4e)
|
||||
a3(vpxor ymm0, ymm0, ymm4)
|
||||
a3(vpermq ymm3, ymm3, 0x93)
|
||||
a3(vpaddq ymm4, ymm3, ymm0)
|
||||
a3(vpshufd ymm4, ymm4, 0xb1)
|
||||
a3(vpxor ymm1, ymm1, ymm4)
|
||||
a3(vpaddq ymm4, ymm0, ymm1)
|
||||
a3(vpsrlq ymm5, ymm4, 51)
|
||||
a3(vpxor ymm10, ymm10, ymm5)
|
||||
a3(vpsllq ymm4, ymm4, 13)
|
||||
a3(vpxor ymm10, ymm10, ymm4)
|
||||
a3(vpaddq ymm4, ymm1, ymm10)
|
||||
a3(vpsrlq ymm5, ymm4, 25)
|
||||
a3(vpxor ymm3, ymm3, ymm5)
|
||||
a3(vpsllq ymm4, ymm4, 39)
|
||||
a3(vpermq ymm1, ymm1, 0x93)
|
||||
a3(vpxor ymm3, ymm3, ymm4)
|
||||
a3(vpermq ymm2, ymm10, 0x4e)
|
||||
a3(vpaddq ymm4, ymm10, ymm3)
|
||||
a3(vpshufd ymm4, ymm4, 0xb1)
|
||||
a3(vpermq ymm3, ymm3, 0x39)
|
||||
a3(vpxor ymm0, ymm0, ymm4)
|
||||
a1(dec rax)
|
||||
aj(jnz scrypt_salsa64_avx2_loop)
|
||||
a3(vpaddq ymm0,ymm0,ymm6)
|
||||
a3(vpaddq ymm1,ymm1,ymm7)
|
||||
a3(vpaddq ymm2,ymm2,ymm8)
|
||||
a3(vpaddq ymm3,ymm3,ymm9)
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0xff)
|
||||
a2(add r9,128)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(vmovdqa [rax+0],ymm0)
|
||||
a2(vmovdqa [rax+32],ymm1)
|
||||
a2(vmovdqa [rax+64],ymm2)
|
||||
a2(vmovdqa [rax+96],ymm3)
|
||||
aj(jne scrypt_ChunkMix_avx2_loop)
|
||||
a1(vzeroupper)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_avx2)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_AVX2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA64_AVX2
|
||||
|
||||
static void asm_calling_convention
|
||||
scrypt_ChunkMix_avx2(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
ymmi *ymmp,y0,y1,y2,y3,t0,t1,t2,t3,z0,z1;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
ymmp = (ymmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
y0 = ymmp[0];
|
||||
y1 = ymmp[1];
|
||||
y2 = ymmp[2];
|
||||
y3 = ymmp[3];
|
||||
|
||||
if (Bxor) {
|
||||
ymmp = (ymmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
y0 = _mm256_xor_si256(y0, ymmp[0]);
|
||||
y1 = _mm256_xor_si256(y1, ymmp[1]);
|
||||
y2 = _mm256_xor_si256(y2, ymmp[2]);
|
||||
y3 = _mm256_xor_si256(y3, ymmp[3]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
ymmp = (ymmi *)scrypt_block(Bin, i);
|
||||
y0 = _mm256_xor_si256(y0, ymmp[0]);
|
||||
y1 = _mm256_xor_si256(y1, ymmp[1]);
|
||||
y2 = _mm256_xor_si256(y2, ymmp[2]);
|
||||
y3 = _mm256_xor_si256(y3, ymmp[3]);
|
||||
|
||||
if (Bxor) {
|
||||
ymmp = (ymmi *)scrypt_block(Bxor, i);
|
||||
y0 = _mm256_xor_si256(y0, ymmp[0]);
|
||||
y1 = _mm256_xor_si256(y1, ymmp[1]);
|
||||
y2 = _mm256_xor_si256(y2, ymmp[2]);
|
||||
y3 = _mm256_xor_si256(y3, ymmp[3]);
|
||||
}
|
||||
|
||||
t0 = y0;
|
||||
t1 = y1;
|
||||
t2 = y2;
|
||||
t3 = y3;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
z0 = _mm256_add_epi64(y0, y1);
|
||||
z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
y3 = _mm256_xor_si256(y3, z0);
|
||||
z0 = _mm256_add_epi64(y3, y0);
|
||||
z1 = _mm256_srli_epi64(z0, 64-13);
|
||||
y2 = _mm256_xor_si256(y2, z1);
|
||||
z0 = _mm256_slli_epi64(z0, 13);
|
||||
y2 = _mm256_xor_si256(y2, z0);
|
||||
z0 = _mm256_add_epi64(y2, y3);
|
||||
z1 = _mm256_srli_epi64(z0, 64-39);
|
||||
y1 = _mm256_xor_si256(y1, z1);
|
||||
z0 = _mm256_slli_epi64(z0, 39);
|
||||
y1 = _mm256_xor_si256(y1, z0);
|
||||
y1 = _mm256_permute4x64_epi64(y1, _MM_SHUFFLE(0,3,2,1));
|
||||
y2 = _mm256_permute4x64_epi64(y2, _MM_SHUFFLE(1,0,3,2));
|
||||
y3 = _mm256_permute4x64_epi64(y3, _MM_SHUFFLE(2,1,0,3));
|
||||
z0 = _mm256_add_epi64(y1, y2);
|
||||
z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
y0 = _mm256_xor_si256(y0, z0);
|
||||
z0 = _mm256_add_epi64(y0, y3);
|
||||
z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
y1 = _mm256_xor_si256(y1, z0);
|
||||
z0 = _mm256_add_epi64(y1, y0);
|
||||
z1 = _mm256_srli_epi64(z0, 64-13);
|
||||
y2 = _mm256_xor_si256(y2, z1);
|
||||
z0 = _mm256_slli_epi64(z0, 13);
|
||||
y2 = _mm256_xor_si256(y2, z0);
|
||||
z0 = _mm256_add_epi64(y2, y1);
|
||||
z1 = _mm256_srli_epi64(z0, 64-39);
|
||||
y3 = _mm256_xor_si256(y3, z1);
|
||||
z0 = _mm256_slli_epi64(z0, 39);
|
||||
y3 = _mm256_xor_si256(y3, z0);
|
||||
z0 = _mm256_add_epi64(y3, y2);
|
||||
z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
y0 = _mm256_xor_si256(y0, z0);
|
||||
y1 = _mm256_permute4x64_epi64(y1, _MM_SHUFFLE(2,1,0,3));
|
||||
y2 = _mm256_permute4x64_epi64(y2, _MM_SHUFFLE(1,0,3,2));
|
||||
y3 = _mm256_permute4x64_epi64(y3, _MM_SHUFFLE(0,3,2,1));
|
||||
}
|
||||
|
||||
y0 = _mm256_add_epi64(y0, t0);
|
||||
y1 = _mm256_add_epi64(y1, t1);
|
||||
y2 = _mm256_add_epi64(y2, t2);
|
||||
y3 = _mm256_add_epi64(y3, t3);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
ymmp = (ymmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
ymmp[0] = y0;
|
||||
ymmp[1] = y1;
|
||||
ymmp[2] = y2;
|
||||
ymmp[3] = y3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX2)
|
||||
/* uses salsa64_core_tangle_sse2 */
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8-AVX2"
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#endif
|
||||
|
|
@ -1,449 +0,0 @@
|
|||
/* x64 */
|
||||
#if defined(X86_64ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA64_SSE2
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_sse2)
|
||||
a1(push rbp)
|
||||
a2(mov rbp, rsp)
|
||||
a2(and rsp, ~63)
|
||||
a2(sub rsp, 128)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,7)
|
||||
a2(lea r9,[rcx-128])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(movdqa xmm0,[rax+0])
|
||||
a2(movdqa xmm1,[rax+16])
|
||||
a2(movdqa xmm2,[rax+32])
|
||||
a2(movdqa xmm3,[rax+48])
|
||||
a2(movdqa xmm4,[rax+64])
|
||||
a2(movdqa xmm5,[rax+80])
|
||||
a2(movdqa xmm6,[rax+96])
|
||||
a2(movdqa xmm7,[rax+112])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor1)
|
||||
a2(pxor xmm0,[r9+0])
|
||||
a2(pxor xmm1,[r9+16])
|
||||
a2(pxor xmm2,[r9+32])
|
||||
a2(pxor xmm3,[r9+48])
|
||||
a2(pxor xmm4,[r9+64])
|
||||
a2(pxor xmm5,[r9+80])
|
||||
a2(pxor xmm6,[r9+96])
|
||||
a2(pxor xmm7,[r9+112])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_sse2_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a2(pxor xmm0,[rsi+r9+0])
|
||||
a2(pxor xmm1,[rsi+r9+16])
|
||||
a2(pxor xmm2,[rsi+r9+32])
|
||||
a2(pxor xmm3,[rsi+r9+48])
|
||||
a2(pxor xmm4,[rsi+r9+64])
|
||||
a2(pxor xmm5,[rsi+r9+80])
|
||||
a2(pxor xmm6,[rsi+r9+96])
|
||||
a2(pxor xmm7,[rsi+r9+112])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor2)
|
||||
a2(pxor xmm0,[rdx+r9+0])
|
||||
a2(pxor xmm1,[rdx+r9+16])
|
||||
a2(pxor xmm2,[rdx+r9+32])
|
||||
a2(pxor xmm3,[rdx+r9+48])
|
||||
a2(pxor xmm4,[rdx+r9+64])
|
||||
a2(pxor xmm5,[rdx+r9+80])
|
||||
a2(pxor xmm6,[rdx+r9+96])
|
||||
a2(pxor xmm7,[rdx+r9+112])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor2:)
|
||||
a2(movdqa [rsp+0],xmm0)
|
||||
a2(movdqa [rsp+16],xmm1)
|
||||
a2(movdqa [rsp+32],xmm2)
|
||||
a2(movdqa [rsp+48],xmm3)
|
||||
a2(movdqa [rsp+64],xmm4)
|
||||
a2(movdqa [rsp+80],xmm5)
|
||||
a2(movdqa [rsp+96],xmm6)
|
||||
a2(movdqa [rsp+112],xmm7)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa64_sse2_loop: )
|
||||
a2(movdqa xmm8, xmm0)
|
||||
a2(movdqa xmm9, xmm1)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm6, xmm8)
|
||||
a2(pxor xmm7, xmm9)
|
||||
a2(movdqa xmm10, xmm0)
|
||||
a2(movdqa xmm11, xmm1)
|
||||
a2(paddq xmm10, xmm6)
|
||||
a2(paddq xmm11, xmm7)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 51)
|
||||
a2(psrlq xmm11, 51)
|
||||
a2(psllq xmm8, 13)
|
||||
a2(psllq xmm9, 13)
|
||||
a2(pxor xmm4, xmm10)
|
||||
a2(pxor xmm5, xmm11)
|
||||
a2(pxor xmm4, xmm8)
|
||||
a2(pxor xmm5, xmm9)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(paddq xmm10, xmm4)
|
||||
a2(paddq xmm11, xmm5)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 25)
|
||||
a2(psrlq xmm11, 25)
|
||||
a2(psllq xmm8, 39)
|
||||
a2(psllq xmm9, 39)
|
||||
a2(pxor xmm2, xmm10)
|
||||
a2(pxor xmm3, xmm11)
|
||||
a2(pxor xmm2, xmm8)
|
||||
a2(pxor xmm3, xmm9)
|
||||
a2(movdqa xmm8, xmm4)
|
||||
a2(movdqa xmm9, xmm5)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm0, xmm8)
|
||||
a2(pxor xmm1, xmm9)
|
||||
a2(movdqa xmm8, xmm2)
|
||||
a2(movdqa xmm9, xmm3)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(movdqa xmm2, xmm7)
|
||||
a2(movdqa xmm3, xmm6)
|
||||
a2(punpcklqdq xmm10, xmm6)
|
||||
a2(punpcklqdq xmm11, xmm7)
|
||||
a2(movdqa xmm6, xmm8)
|
||||
a2(movdqa xmm7, xmm9)
|
||||
a2(punpcklqdq xmm9, xmm9)
|
||||
a2(punpcklqdq xmm8, xmm8)
|
||||
a2(punpckhqdq xmm2, xmm10)
|
||||
a2(punpckhqdq xmm3, xmm11)
|
||||
a2(punpckhqdq xmm6, xmm9)
|
||||
a2(punpckhqdq xmm7, xmm8)
|
||||
a2(sub rax, 2)
|
||||
a2(movdqa xmm8, xmm0)
|
||||
a2(movdqa xmm9, xmm1)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm6, xmm8)
|
||||
a2(pxor xmm7, xmm9)
|
||||
a2(movdqa xmm10, xmm0)
|
||||
a2(movdqa xmm11, xmm1)
|
||||
a2(paddq xmm10, xmm6)
|
||||
a2(paddq xmm11, xmm7)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 51)
|
||||
a2(psrlq xmm11, 51)
|
||||
a2(psllq xmm8, 13)
|
||||
a2(psllq xmm9, 13)
|
||||
a2(pxor xmm5, xmm10)
|
||||
a2(pxor xmm4, xmm11)
|
||||
a2(pxor xmm5, xmm8)
|
||||
a2(pxor xmm4, xmm9)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(paddq xmm10, xmm5)
|
||||
a2(paddq xmm11, xmm4)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 25)
|
||||
a2(psrlq xmm11, 25)
|
||||
a2(psllq xmm8, 39)
|
||||
a2(psllq xmm9, 39)
|
||||
a2(pxor xmm2, xmm10)
|
||||
a2(pxor xmm3, xmm11)
|
||||
a2(pxor xmm2, xmm8)
|
||||
a2(pxor xmm3, xmm9)
|
||||
a2(movdqa xmm8, xmm5)
|
||||
a2(movdqa xmm9, xmm4)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm0, xmm8)
|
||||
a2(pxor xmm1, xmm9)
|
||||
a2(movdqa xmm8, xmm2)
|
||||
a2(movdqa xmm9, xmm3)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(movdqa xmm2, xmm7)
|
||||
a2(movdqa xmm3, xmm6)
|
||||
a2(punpcklqdq xmm10, xmm6)
|
||||
a2(punpcklqdq xmm11, xmm7)
|
||||
a2(movdqa xmm6, xmm8)
|
||||
a2(movdqa xmm7, xmm9)
|
||||
a2(punpcklqdq xmm9, xmm9)
|
||||
a2(punpcklqdq xmm8, xmm8)
|
||||
a2(punpckhqdq xmm2, xmm10)
|
||||
a2(punpckhqdq xmm3, xmm11)
|
||||
a2(punpckhqdq xmm6, xmm9)
|
||||
a2(punpckhqdq xmm7, xmm8)
|
||||
aj(ja scrypt_salsa64_sse2_loop)
|
||||
a2(paddq xmm0,[rsp+0])
|
||||
a2(paddq xmm1,[rsp+16])
|
||||
a2(paddq xmm2,[rsp+32])
|
||||
a2(paddq xmm3,[rsp+48])
|
||||
a2(paddq xmm4,[rsp+64])
|
||||
a2(paddq xmm5,[rsp+80])
|
||||
a2(paddq xmm6,[rsp+96])
|
||||
a2(paddq xmm7,[rsp+112])
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0xff)
|
||||
a2(add r9,128)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(movdqa [rax+0],xmm0)
|
||||
a2(movdqa [rax+16],xmm1)
|
||||
a2(movdqa [rax+32],xmm2)
|
||||
a2(movdqa [rax+48],xmm3)
|
||||
a2(movdqa [rax+64],xmm4)
|
||||
a2(movdqa [rax+80],xmm5)
|
||||
a2(movdqa [rax+96],xmm6)
|
||||
a2(movdqa [rax+112],xmm7)
|
||||
aj(jne scrypt_ChunkMix_sse2_loop)
|
||||
a2(mov rsp, rbp)
|
||||
a1(pop rbp)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_sse2)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA64_SSE2
|
||||
|
||||
static void asm_calling_convention
|
||||
scrypt_ChunkMix_sse2(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
x4 = xmmp[4];
|
||||
x5 = xmmp[5];
|
||||
x6 = xmmp[6];
|
||||
x7 = xmmp[7];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
t4 = x4;
|
||||
t5 = x5;
|
||||
t6 = x6;
|
||||
t7 = x7;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x4 = _mm_xor_si128(x4, z2);
|
||||
x5 = _mm_xor_si128(x5, z3);
|
||||
x4 = _mm_xor_si128(x4, z0);
|
||||
x5 = _mm_xor_si128(x5, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x4, x6);
|
||||
z1 = _mm_add_epi64(x5, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x4);
|
||||
z1 = _mm_add_epi64(x3, x5);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x4;
|
||||
z1 = x5;
|
||||
z2 = x2;
|
||||
z3 = x3;
|
||||
x4 = z1;
|
||||
x5 = z0;
|
||||
x2 = _mm_unpackhi_epi64(x7, _mm_unpacklo_epi64(x6, x6));
|
||||
x3 = _mm_unpackhi_epi64(x6, _mm_unpacklo_epi64(x7, x7));
|
||||
x6 = _mm_unpackhi_epi64(z2, _mm_unpacklo_epi64(z3, z3));
|
||||
x7 = _mm_unpackhi_epi64(z3, _mm_unpacklo_epi64(z2, z2));
|
||||
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x4 = _mm_xor_si128(x4, z2);
|
||||
x5 = _mm_xor_si128(x5, z3);
|
||||
x4 = _mm_xor_si128(x4, z0);
|
||||
x5 = _mm_xor_si128(x5, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x4, x6);
|
||||
z1 = _mm_add_epi64(x5, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x4);
|
||||
z1 = _mm_add_epi64(x3, x5);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x4;
|
||||
z1 = x5;
|
||||
z2 = x2;
|
||||
z3 = x3;
|
||||
x4 = z1;
|
||||
x5 = z0;
|
||||
x2 = _mm_unpackhi_epi64(x7, _mm_unpacklo_epi64(x6, x6));
|
||||
x3 = _mm_unpackhi_epi64(x6, _mm_unpacklo_epi64(x7, x7));
|
||||
x6 = _mm_unpackhi_epi64(z2, _mm_unpacklo_epi64(z3, z3));
|
||||
x7 = _mm_unpackhi_epi64(z3, _mm_unpacklo_epi64(z2, z2));
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi64(x0, t0);
|
||||
x1 = _mm_add_epi64(x1, t1);
|
||||
x2 = _mm_add_epi64(x2, t2);
|
||||
x3 = _mm_add_epi64(x3, t3);
|
||||
x4 = _mm_add_epi64(x4, t4);
|
||||
x5 = _mm_add_epi64(x5, t5);
|
||||
x6 = _mm_add_epi64(x6, t6);
|
||||
x7 = _mm_add_epi64(x7, t7);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
xmmp[4] = x4;
|
||||
xmmp[5] = x5;
|
||||
xmmp[6] = x6;
|
||||
xmmp[7] = x7;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSE2)
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8-SSE2"
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#endif
|
||||
|
||||
/* sse3/avx use this as well */
|
||||
#if defined(SCRYPT_SALSA64_INCLUDED)
|
||||
/*
|
||||
Default layout:
|
||||
0 1 2 3
|
||||
4 5 6 7
|
||||
8 9 10 11
|
||||
12 13 14 15
|
||||
|
||||
SSE2 layout:
|
||||
0 5 10 15
|
||||
12 1 6 11
|
||||
8 13 2 7
|
||||
4 9 14 3
|
||||
*/
|
||||
|
||||
|
||||
static void asm_calling_convention
|
||||
salsa64_core_tangle_sse2(uint64_t *blocks, size_t count) {
|
||||
uint64_t t;
|
||||
while (count--) {
|
||||
t = blocks[1]; blocks[1] = blocks[5]; blocks[5] = t;
|
||||
t = blocks[2]; blocks[2] = blocks[10]; blocks[10] = t;
|
||||
t = blocks[3]; blocks[3] = blocks[15]; blocks[15] = t;
|
||||
t = blocks[4]; blocks[4] = blocks[12]; blocks[12] = t;
|
||||
t = blocks[7]; blocks[7] = blocks[11]; blocks[11] = t;
|
||||
t = blocks[9]; blocks[9] = blocks[13]; blocks[13] = t;
|
||||
blocks += 16;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
@ -1,399 +0,0 @@
|
|||
/* x64 */
|
||||
#if defined(X86_64ASM_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA64_SSSE3
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_ssse3)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_ssse3)
|
||||
a1(push rbp)
|
||||
a2(mov rbp, rsp)
|
||||
a2(and rsp, ~63)
|
||||
a2(sub rsp, 128)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,7)
|
||||
a2(lea r9,[rcx-128])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(movdqa xmm0,[rax+0])
|
||||
a2(movdqa xmm1,[rax+16])
|
||||
a2(movdqa xmm2,[rax+32])
|
||||
a2(movdqa xmm3,[rax+48])
|
||||
a2(movdqa xmm4,[rax+64])
|
||||
a2(movdqa xmm5,[rax+80])
|
||||
a2(movdqa xmm6,[rax+96])
|
||||
a2(movdqa xmm7,[rax+112])
|
||||
aj(jz scrypt_ChunkMix_ssse3_no_xor1)
|
||||
a2(pxor xmm0,[r9+0])
|
||||
a2(pxor xmm1,[r9+16])
|
||||
a2(pxor xmm2,[r9+32])
|
||||
a2(pxor xmm3,[r9+48])
|
||||
a2(pxor xmm4,[r9+64])
|
||||
a2(pxor xmm5,[r9+80])
|
||||
a2(pxor xmm6,[r9+96])
|
||||
a2(pxor xmm7,[r9+112])
|
||||
a1(scrypt_ChunkMix_ssse3_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_ssse3_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a2(pxor xmm0,[rsi+r9+0])
|
||||
a2(pxor xmm1,[rsi+r9+16])
|
||||
a2(pxor xmm2,[rsi+r9+32])
|
||||
a2(pxor xmm3,[rsi+r9+48])
|
||||
a2(pxor xmm4,[rsi+r9+64])
|
||||
a2(pxor xmm5,[rsi+r9+80])
|
||||
a2(pxor xmm6,[rsi+r9+96])
|
||||
a2(pxor xmm7,[rsi+r9+112])
|
||||
aj(jz scrypt_ChunkMix_ssse3_no_xor2)
|
||||
a2(pxor xmm0,[rdx+r9+0])
|
||||
a2(pxor xmm1,[rdx+r9+16])
|
||||
a2(pxor xmm2,[rdx+r9+32])
|
||||
a2(pxor xmm3,[rdx+r9+48])
|
||||
a2(pxor xmm4,[rdx+r9+64])
|
||||
a2(pxor xmm5,[rdx+r9+80])
|
||||
a2(pxor xmm6,[rdx+r9+96])
|
||||
a2(pxor xmm7,[rdx+r9+112])
|
||||
a1(scrypt_ChunkMix_ssse3_no_xor2:)
|
||||
a2(movdqa [rsp+0],xmm0)
|
||||
a2(movdqa [rsp+16],xmm1)
|
||||
a2(movdqa [rsp+32],xmm2)
|
||||
a2(movdqa [rsp+48],xmm3)
|
||||
a2(movdqa [rsp+64],xmm4)
|
||||
a2(movdqa [rsp+80],xmm5)
|
||||
a2(movdqa [rsp+96],xmm6)
|
||||
a2(movdqa [rsp+112],xmm7)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa64_ssse3_loop: )
|
||||
a2(movdqa xmm8, xmm0)
|
||||
a2(movdqa xmm9, xmm1)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm6, xmm8)
|
||||
a2(pxor xmm7, xmm9)
|
||||
a2(movdqa xmm10, xmm0)
|
||||
a2(movdqa xmm11, xmm1)
|
||||
a2(paddq xmm10, xmm6)
|
||||
a2(paddq xmm11, xmm7)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 51)
|
||||
a2(psrlq xmm11, 51)
|
||||
a2(psllq xmm8, 13)
|
||||
a2(psllq xmm9, 13)
|
||||
a2(pxor xmm4, xmm10)
|
||||
a2(pxor xmm5, xmm11)
|
||||
a2(pxor xmm4, xmm8)
|
||||
a2(pxor xmm5, xmm9)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(paddq xmm10, xmm4)
|
||||
a2(paddq xmm11, xmm5)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 25)
|
||||
a2(psrlq xmm11, 25)
|
||||
a2(psllq xmm8, 39)
|
||||
a2(psllq xmm9, 39)
|
||||
a2(pxor xmm2, xmm10)
|
||||
a2(pxor xmm3, xmm11)
|
||||
a2(pxor xmm2, xmm8)
|
||||
a2(pxor xmm3, xmm9)
|
||||
a2(movdqa xmm8, xmm4)
|
||||
a2(movdqa xmm9, xmm5)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm0, xmm8)
|
||||
a2(pxor xmm1, xmm9)
|
||||
a2(movdqa xmm10, xmm2)
|
||||
a2(movdqa xmm11, xmm3)
|
||||
a2(movdqa xmm2, xmm6)
|
||||
a2(movdqa xmm3, xmm7)
|
||||
a3(palignr xmm2, xmm7, 8)
|
||||
a3(palignr xmm3, xmm6, 8)
|
||||
a2(movdqa xmm6, xmm11)
|
||||
a2(movdqa xmm7, xmm10)
|
||||
a3(palignr xmm6, xmm10, 8)
|
||||
a3(palignr xmm7, xmm11, 8)
|
||||
a2(sub rax, 2)
|
||||
a2(movdqa xmm8, xmm0)
|
||||
a2(movdqa xmm9, xmm1)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm6, xmm8)
|
||||
a2(pxor xmm7, xmm9)
|
||||
a2(movdqa xmm10, xmm0)
|
||||
a2(movdqa xmm11, xmm1)
|
||||
a2(paddq xmm10, xmm6)
|
||||
a2(paddq xmm11, xmm7)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 51)
|
||||
a2(psrlq xmm11, 51)
|
||||
a2(psllq xmm8, 13)
|
||||
a2(psllq xmm9, 13)
|
||||
a2(pxor xmm5, xmm10)
|
||||
a2(pxor xmm4, xmm11)
|
||||
a2(pxor xmm5, xmm8)
|
||||
a2(pxor xmm4, xmm9)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(paddq xmm10, xmm5)
|
||||
a2(paddq xmm11, xmm4)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 25)
|
||||
a2(psrlq xmm11, 25)
|
||||
a2(psllq xmm8, 39)
|
||||
a2(psllq xmm9, 39)
|
||||
a2(pxor xmm2, xmm10)
|
||||
a2(pxor xmm3, xmm11)
|
||||
a2(pxor xmm2, xmm8)
|
||||
a2(pxor xmm3, xmm9)
|
||||
a2(movdqa xmm8, xmm5)
|
||||
a2(movdqa xmm9, xmm4)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm0, xmm8)
|
||||
a2(pxor xmm1, xmm9)
|
||||
a2(movdqa xmm10, xmm2)
|
||||
a2(movdqa xmm11, xmm3)
|
||||
a2(movdqa xmm2, xmm6)
|
||||
a2(movdqa xmm3, xmm7)
|
||||
a3(palignr xmm2, xmm7, 8)
|
||||
a3(palignr xmm3, xmm6, 8)
|
||||
a2(movdqa xmm6, xmm11)
|
||||
a2(movdqa xmm7, xmm10)
|
||||
a3(palignr xmm6, xmm10, 8)
|
||||
a3(palignr xmm7, xmm11, 8)
|
||||
aj(ja scrypt_salsa64_ssse3_loop)
|
||||
a2(paddq xmm0,[rsp+0])
|
||||
a2(paddq xmm1,[rsp+16])
|
||||
a2(paddq xmm2,[rsp+32])
|
||||
a2(paddq xmm3,[rsp+48])
|
||||
a2(paddq xmm4,[rsp+64])
|
||||
a2(paddq xmm5,[rsp+80])
|
||||
a2(paddq xmm6,[rsp+96])
|
||||
a2(paddq xmm7,[rsp+112])
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0xff)
|
||||
a2(add r9,128)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(movdqa [rax+0],xmm0)
|
||||
a2(movdqa [rax+16],xmm1)
|
||||
a2(movdqa [rax+32],xmm2)
|
||||
a2(movdqa [rax+48],xmm3)
|
||||
a2(movdqa [rax+64],xmm4)
|
||||
a2(movdqa [rax+80],xmm5)
|
||||
a2(movdqa [rax+96],xmm6)
|
||||
a2(movdqa [rax+112],xmm7)
|
||||
aj(jne scrypt_ChunkMix_ssse3_loop)
|
||||
a2(mov rsp, rbp)
|
||||
a1(pop rbp)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_ssse3)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA64_SSSE3
|
||||
|
||||
static void asm_calling_convention
|
||||
scrypt_ChunkMix_ssse3(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
x4 = xmmp[4];
|
||||
x5 = xmmp[5];
|
||||
x6 = xmmp[6];
|
||||
x7 = xmmp[7];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
t4 = x4;
|
||||
t5 = x5;
|
||||
t6 = x6;
|
||||
t7 = x7;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x4 = _mm_xor_si128(x4, z2);
|
||||
x5 = _mm_xor_si128(x5, z3);
|
||||
x4 = _mm_xor_si128(x4, z0);
|
||||
x5 = _mm_xor_si128(x5, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x4, x6);
|
||||
z1 = _mm_add_epi64(x5, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x4);
|
||||
z1 = _mm_add_epi64(x3, x5);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x5 = _mm_xor_si128(x5, z2);
|
||||
x4 = _mm_xor_si128(x4, z3);
|
||||
x5 = _mm_xor_si128(x5, z0);
|
||||
x4 = _mm_xor_si128(x4, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x5, x6);
|
||||
z1 = _mm_add_epi64(x4, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x5);
|
||||
z1 = _mm_add_epi64(x3, x4);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi64(x0, t0);
|
||||
x1 = _mm_add_epi64(x1, t1);
|
||||
x2 = _mm_add_epi64(x2, t2);
|
||||
x3 = _mm_add_epi64(x3, t3);
|
||||
x4 = _mm_add_epi64(x4, t4);
|
||||
x5 = _mm_add_epi64(x5, t5);
|
||||
x6 = _mm_add_epi64(x6, t6);
|
||||
x7 = _mm_add_epi64(x7, t7);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
xmmp[4] = x4;
|
||||
xmmp[5] = x5;
|
||||
xmmp[6] = x6;
|
||||
xmmp[7] = x7;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSSE3)
|
||||
/* uses salsa64_core_tangle_sse2 */
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8-SSSE3"
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#endif
|
||||
|
|
@ -1,335 +0,0 @@
|
|||
/* x64 */
|
||||
#if defined(X86_64ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA64_XOP
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_xop)
|
||||
a1(push rbp)
|
||||
a2(mov rbp, rsp)
|
||||
a2(and rsp, ~63)
|
||||
a2(sub rsp, 128)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,7)
|
||||
a2(lea r9,[rcx-128])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(vmovdqa xmm0,[rax+0])
|
||||
a2(vmovdqa xmm1,[rax+16])
|
||||
a2(vmovdqa xmm2,[rax+32])
|
||||
a2(vmovdqa xmm3,[rax+48])
|
||||
a2(vmovdqa xmm4,[rax+64])
|
||||
a2(vmovdqa xmm5,[rax+80])
|
||||
a2(vmovdqa xmm6,[rax+96])
|
||||
a2(vmovdqa xmm7,[rax+112])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[r9+0])
|
||||
a3(vpxor xmm1,xmm1,[r9+16])
|
||||
a3(vpxor xmm2,xmm2,[r9+32])
|
||||
a3(vpxor xmm3,xmm3,[r9+48])
|
||||
a3(vpxor xmm4,xmm4,[r9+64])
|
||||
a3(vpxor xmm5,xmm5,[r9+80])
|
||||
a3(vpxor xmm6,xmm6,[r9+96])
|
||||
a3(vpxor xmm7,xmm7,[r9+112])
|
||||
a1(scrypt_ChunkMix_xop_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_xop_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a3(vpxor xmm0,xmm0,[rsi+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rsi+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rsi+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rsi+r9+48])
|
||||
a3(vpxor xmm4,xmm4,[rsi+r9+64])
|
||||
a3(vpxor xmm5,xmm5,[rsi+r9+80])
|
||||
a3(vpxor xmm6,xmm6,[rsi+r9+96])
|
||||
a3(vpxor xmm7,xmm7,[rsi+r9+112])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[rdx+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rdx+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rdx+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rdx+r9+48])
|
||||
a3(vpxor xmm4,xmm4,[rdx+r9+64])
|
||||
a3(vpxor xmm5,xmm5,[rdx+r9+80])
|
||||
a3(vpxor xmm6,xmm6,[rdx+r9+96])
|
||||
a3(vpxor xmm7,xmm7,[rdx+r9+112])
|
||||
a1(scrypt_ChunkMix_xop_no_xor2:)
|
||||
a2(vmovdqa [rsp+0],xmm0)
|
||||
a2(vmovdqa [rsp+16],xmm1)
|
||||
a2(vmovdqa [rsp+32],xmm2)
|
||||
a2(vmovdqa [rsp+48],xmm3)
|
||||
a2(vmovdqa [rsp+64],xmm4)
|
||||
a2(vmovdqa [rsp+80],xmm5)
|
||||
a2(vmovdqa [rsp+96],xmm6)
|
||||
a2(vmovdqa [rsp+112],xmm7)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa64_xop_loop: )
|
||||
a3(vpaddq xmm8, xmm0, xmm2)
|
||||
a3(vpaddq xmm9, xmm1, xmm3)
|
||||
a3(vpshufd xmm8, xmm8, 0xb1)
|
||||
a3(vpshufd xmm9, xmm9, 0xb1)
|
||||
a3(vpxor xmm6, xmm6, xmm8)
|
||||
a3(vpxor xmm7, xmm7, xmm9)
|
||||
a3(vpaddq xmm10, xmm0, xmm6)
|
||||
a3(vpaddq xmm11, xmm1, xmm7)
|
||||
a3(vprotq xmm10, xmm10, 13)
|
||||
a3(vprotq xmm11, xmm11, 13)
|
||||
a3(vpxor xmm4, xmm4, xmm10)
|
||||
a3(vpxor xmm5, xmm5, xmm11)
|
||||
a3(vpaddq xmm8, xmm6, xmm4)
|
||||
a3(vpaddq xmm9, xmm7, xmm5)
|
||||
a3(vprotq xmm8, xmm8, 39)
|
||||
a3(vprotq xmm9, xmm9, 39)
|
||||
a3(vpxor xmm2, xmm2, xmm8)
|
||||
a3(vpxor xmm3, xmm3, xmm9)
|
||||
a3(vpaddq xmm10, xmm4, xmm2)
|
||||
a3(vpaddq xmm11, xmm5, xmm3)
|
||||
a3(vpshufd xmm10, xmm10, 0xb1)
|
||||
a3(vpshufd xmm11, xmm11, 0xb1)
|
||||
a3(vpxor xmm0, xmm0, xmm10)
|
||||
a3(vpxor xmm1, xmm1, xmm11)
|
||||
a2(vmovdqa xmm8, xmm2)
|
||||
a2(vmovdqa xmm9, xmm3)
|
||||
a4(vpalignr xmm2, xmm6, xmm7, 8)
|
||||
a4(vpalignr xmm3, xmm7, xmm6, 8)
|
||||
a4(vpalignr xmm6, xmm9, xmm8, 8)
|
||||
a4(vpalignr xmm7, xmm8, xmm9, 8)
|
||||
a3(vpaddq xmm10, xmm0, xmm2)
|
||||
a3(vpaddq xmm11, xmm1, xmm3)
|
||||
a3(vpshufd xmm10, xmm10, 0xb1)
|
||||
a3(vpshufd xmm11, xmm11, 0xb1)
|
||||
a3(vpxor xmm6, xmm6, xmm10)
|
||||
a3(vpxor xmm7, xmm7, xmm11)
|
||||
a3(vpaddq xmm8, xmm0, xmm6)
|
||||
a3(vpaddq xmm9, xmm1, xmm7)
|
||||
a3(vprotq xmm8, xmm8, 13)
|
||||
a3(vprotq xmm9, xmm9, 13)
|
||||
a3(vpxor xmm5, xmm5, xmm8)
|
||||
a3(vpxor xmm4, xmm4, xmm9)
|
||||
a3(vpaddq xmm10, xmm6, xmm5)
|
||||
a3(vpaddq xmm11, xmm7, xmm4)
|
||||
a3(vprotq xmm10, xmm10, 39)
|
||||
a3(vprotq xmm11, xmm11, 39)
|
||||
a3(vpxor xmm2, xmm2, xmm10)
|
||||
a3(vpxor xmm3, xmm3, xmm11)
|
||||
a3(vpaddq xmm8, xmm5, xmm2)
|
||||
a3(vpaddq xmm9, xmm4, xmm3)
|
||||
a3(vpshufd xmm8, xmm8, 0xb1)
|
||||
a3(vpshufd xmm9, xmm9, 0xb1)
|
||||
a3(vpxor xmm0, xmm0, xmm8)
|
||||
a3(vpxor xmm1, xmm1, xmm9)
|
||||
a2(vmovdqa xmm10, xmm2)
|
||||
a2(vmovdqa xmm11, xmm3)
|
||||
a4(vpalignr xmm2, xmm6, xmm7, 8)
|
||||
a4(vpalignr xmm3, xmm7, xmm6, 8)
|
||||
a4(vpalignr xmm6, xmm11, xmm10, 8)
|
||||
a4(vpalignr xmm7, xmm10, xmm11, 8)
|
||||
a2(sub rax, 2)
|
||||
aj(ja scrypt_salsa64_xop_loop)
|
||||
a3(vpaddq xmm0,xmm0,[rsp+0])
|
||||
a3(vpaddq xmm1,xmm1,[rsp+16])
|
||||
a3(vpaddq xmm2,xmm2,[rsp+32])
|
||||
a3(vpaddq xmm3,xmm3,[rsp+48])
|
||||
a3(vpaddq xmm4,xmm4,[rsp+64])
|
||||
a3(vpaddq xmm5,xmm5,[rsp+80])
|
||||
a3(vpaddq xmm6,xmm6,[rsp+96])
|
||||
a3(vpaddq xmm7,xmm7,[rsp+112])
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0xff)
|
||||
a2(add r9,128)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(vmovdqa [rax+0],xmm0)
|
||||
a2(vmovdqa [rax+16],xmm1)
|
||||
a2(vmovdqa [rax+32],xmm2)
|
||||
a2(vmovdqa [rax+48],xmm3)
|
||||
a2(vmovdqa [rax+64],xmm4)
|
||||
a2(vmovdqa [rax+80],xmm5)
|
||||
a2(vmovdqa [rax+96],xmm6)
|
||||
a2(vmovdqa [rax+112],xmm7)
|
||||
aj(jne scrypt_ChunkMix_xop_loop)
|
||||
a2(mov rsp, rbp)
|
||||
a1(pop rbp)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_xop)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA64_XOP
|
||||
|
||||
static void asm_calling_convention
|
||||
scrypt_ChunkMix_xop(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
x4 = xmmp[4];
|
||||
x5 = xmmp[5];
|
||||
x6 = xmmp[6];
|
||||
x7 = xmmp[7];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
t4 = x4;
|
||||
t5 = x5;
|
||||
t6 = x6;
|
||||
t7 = x7;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z0 = _mm_roti_epi64(z0, 13);
|
||||
z1 = _mm_roti_epi64(z1, 13);
|
||||
x4 = _mm_xor_si128(x4, z0);
|
||||
x5 = _mm_xor_si128(x5, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x4, x6);
|
||||
z1 = _mm_add_epi64(x5, x7);
|
||||
z0 = _mm_roti_epi64(z0, 39);
|
||||
z1 = _mm_roti_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x4);
|
||||
z1 = _mm_add_epi64(x3, x5);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z0 = _mm_roti_epi64(z0, 13);
|
||||
z1 = _mm_roti_epi64(z1, 13);
|
||||
x5 = _mm_xor_si128(x5, z0);
|
||||
x4 = _mm_xor_si128(x4, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x5, x6);
|
||||
z1 = _mm_add_epi64(x4, x7);
|
||||
z0 = _mm_roti_epi64(z0, 39);
|
||||
z1 = _mm_roti_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x5);
|
||||
z1 = _mm_add_epi64(x3, x4);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi64(x0, t0);
|
||||
x1 = _mm_add_epi64(x1, t1);
|
||||
x2 = _mm_add_epi64(x2, t2);
|
||||
x3 = _mm_add_epi64(x3, t3);
|
||||
x4 = _mm_add_epi64(x4, t4);
|
||||
x5 = _mm_add_epi64(x5, t5);
|
||||
x6 = _mm_add_epi64(x6, t6);
|
||||
x7 = _mm_add_epi64(x7, t7);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
xmmp[4] = x4;
|
||||
xmmp[5] = x5;
|
||||
xmmp[6] = x6;
|
||||
xmmp[7] = x7;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_XOP)
|
||||
/* uses salsa64_core_tangle_sse2 */
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8-XOP"
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#endif
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8 Ref"
|
||||
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_BASIC
|
||||
|
||||
static void
|
||||
salsa64_core_basic(uint64_t state[16]) {
|
||||
const size_t rounds = 8;
|
||||
uint64_t v[16], t;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < 16; i++) v[i] = state[i];
|
||||
|
||||
#define G(a,b,c,d) \
|
||||
t = v[a]+v[d]; t = ROTL64(t, 32); v[b] ^= t; \
|
||||
t = v[b]+v[a]; t = ROTL64(t, 13); v[c] ^= t; \
|
||||
t = v[c]+v[b]; t = ROTL64(t, 39); v[d] ^= t; \
|
||||
t = v[d]+v[c]; t = ROTL64(t, 32); v[a] ^= t; \
|
||||
|
||||
for (i = 0; i < rounds; i += 2) {
|
||||
G( 0, 4, 8,12);
|
||||
G( 5, 9,13, 1);
|
||||
G(10,14, 2, 6);
|
||||
G(15, 3, 7,11);
|
||||
G( 0, 1, 2, 3);
|
||||
G( 5, 6, 7, 4);
|
||||
G(10,11, 8, 9);
|
||||
G(15,12,13,14);
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++) state[i] += v[i];
|
||||
|
||||
#undef G
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
112
vendor/scrypt-jane/code/scrypt-jane-pbkdf2.h
vendored
112
vendor/scrypt-jane/code/scrypt-jane-pbkdf2.h
vendored
|
|
@ -1,112 +0,0 @@
|
|||
typedef struct scrypt_hmac_state_t {
|
||||
scrypt_hash_state inner, outer;
|
||||
} scrypt_hmac_state;
|
||||
|
||||
|
||||
static void
|
||||
scrypt_hash(scrypt_hash_digest hash, const uint8_t *m, size_t mlen) {
|
||||
scrypt_hash_state st;
|
||||
scrypt_hash_init(&st);
|
||||
scrypt_hash_update(&st, m, mlen);
|
||||
scrypt_hash_finish(&st, hash);
|
||||
}
|
||||
|
||||
/* hmac */
|
||||
static void
|
||||
scrypt_hmac_init(scrypt_hmac_state *st, const uint8_t *key, size_t keylen) {
|
||||
uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] = {0};
|
||||
size_t i;
|
||||
|
||||
scrypt_hash_init(&st->inner);
|
||||
scrypt_hash_init(&st->outer);
|
||||
|
||||
if (keylen <= SCRYPT_HASH_BLOCK_SIZE) {
|
||||
/* use the key directly if it's <= blocksize bytes */
|
||||
memcpy(pad, key, keylen);
|
||||
} else {
|
||||
/* if it's > blocksize bytes, hash it */
|
||||
scrypt_hash(pad, key, keylen);
|
||||
}
|
||||
|
||||
/* inner = (key ^ 0x36) */
|
||||
/* h(inner || ...) */
|
||||
for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++)
|
||||
pad[i] ^= 0x36;
|
||||
scrypt_hash_update(&st->inner, pad, SCRYPT_HASH_BLOCK_SIZE);
|
||||
|
||||
/* outer = (key ^ 0x5c) */
|
||||
/* h(outer || ...) */
|
||||
for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++)
|
||||
pad[i] ^= (0x5c ^ 0x36);
|
||||
scrypt_hash_update(&st->outer, pad, SCRYPT_HASH_BLOCK_SIZE);
|
||||
|
||||
scrypt_ensure_zero(pad, sizeof(pad));
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hmac_update(scrypt_hmac_state *st, const uint8_t *m, size_t mlen) {
|
||||
/* h(inner || m...) */
|
||||
scrypt_hash_update(&st->inner, m, mlen);
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hmac_finish(scrypt_hmac_state *st, scrypt_hash_digest mac) {
|
||||
/* h(inner || m) */
|
||||
scrypt_hash_digest innerhash;
|
||||
scrypt_hash_finish(&st->inner, innerhash);
|
||||
|
||||
/* h(outer || h(inner || m)) */
|
||||
scrypt_hash_update(&st->outer, innerhash, sizeof(innerhash));
|
||||
scrypt_hash_finish(&st->outer, mac);
|
||||
|
||||
scrypt_ensure_zero(st, sizeof(*st));
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_pbkdf2(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, uint64_t N, uint8_t *out, size_t bytes) {
|
||||
scrypt_hmac_state hmac_pw, hmac_pw_salt, work;
|
||||
scrypt_hash_digest ti, u;
|
||||
uint8_t be[4];
|
||||
uint32_t i, j, blocks;
|
||||
uint64_t c;
|
||||
|
||||
/* bytes must be <= (0xffffffff - (SCRYPT_HASH_DIGEST_SIZE - 1)), which they will always be under scrypt */
|
||||
|
||||
/* hmac(password, ...) */
|
||||
scrypt_hmac_init(&hmac_pw, password, password_len);
|
||||
|
||||
/* hmac(password, salt...) */
|
||||
hmac_pw_salt = hmac_pw;
|
||||
scrypt_hmac_update(&hmac_pw_salt, salt, salt_len);
|
||||
|
||||
blocks = ((uint32_t)bytes + (SCRYPT_HASH_DIGEST_SIZE - 1)) / SCRYPT_HASH_DIGEST_SIZE;
|
||||
for (i = 1; i <= blocks; i++) {
|
||||
/* U1 = hmac(password, salt || be(i)) */
|
||||
U32TO8_BE(be, i);
|
||||
work = hmac_pw_salt;
|
||||
scrypt_hmac_update(&work, be, 4);
|
||||
scrypt_hmac_finish(&work, ti);
|
||||
memcpy(u, ti, sizeof(u));
|
||||
|
||||
/* T[i] = U1 ^ U2 ^ U3... */
|
||||
for (c = 0; c < N - 1; c++) {
|
||||
/* UX = hmac(password, U{X-1}) */
|
||||
work = hmac_pw;
|
||||
scrypt_hmac_update(&work, u, SCRYPT_HASH_DIGEST_SIZE);
|
||||
scrypt_hmac_finish(&work, u);
|
||||
|
||||
/* T[i] ^= UX */
|
||||
for (j = 0; j < sizeof(u); j++)
|
||||
ti[j] ^= u[j];
|
||||
}
|
||||
|
||||
memcpy(out, ti, (bytes > SCRYPT_HASH_DIGEST_SIZE) ? SCRYPT_HASH_DIGEST_SIZE : bytes);
|
||||
out += SCRYPT_HASH_DIGEST_SIZE;
|
||||
bytes -= SCRYPT_HASH_DIGEST_SIZE;
|
||||
}
|
||||
|
||||
scrypt_ensure_zero(ti, sizeof(ti));
|
||||
scrypt_ensure_zero(u, sizeof(u));
|
||||
scrypt_ensure_zero(&hmac_pw, sizeof(hmac_pw));
|
||||
scrypt_ensure_zero(&hmac_pw_salt, sizeof(hmac_pw_salt));
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue