#include "stdafx.h" #include #include "EdgeZLib.h" #include "edge/zlib/edgezlib_ppu.h" static CellSpurs* s_pSpurs = NULL; //Set this to 5 if you want deflate/inflate to run in parallel on 5 SPUs. const uint32_t kNumDeflateTasks = 1; const uint32_t kMaxNumDeflateQueueEntries = 64; static CellSpursEventFlag s_eventFlagDeflate; //Cannot be on stack static CellSpursTaskset s_taskSetDeflate; //Cannot be on stack static EdgeZlibDeflateQHandle s_deflateQueue; static void* s_pDeflateQueueBuffer = NULL; static void* s_pDeflateTaskContext[kNumDeflateTasks]; static uint32_t s_numElementsToCompress; //Cannot be on stack const uint32_t kNumInflateTasks = 1; const uint32_t kMaxNumInflateQueueEntries = 64; static CellSpursEventFlag s_eventFlagInflate; //Cannot be on stack static CellSpursTaskset s_taskSetInflate; //Cannot be on stack static EdgeZlibInflateQHandle s_inflateQueue; static void* s_pInflateQueueBuffer = NULL; static void* s_pInflateTaskContext[kNumInflateTasks]; static uint32_t s_numElementsToDecompress; //Cannot be on stack static CRITICAL_SECTION s_critSect; static const bool sc_verbose = false; #define printf_verbose(...) { if(sc_verbose){ printf(__VA_ARGS__);} } void EdgeZLib::Init(CellSpurs* pSpurs) { s_pSpurs = pSpurs; InitializeCriticalSection(&s_critSect); ////////////////////////////////////////////////////////////////////////// // // Initialize Deflate Queue. // This will hold the queue of work that the Deflate Task(s) on SPU will // pull work from. // ////////////////////////////////////////////////////////////////////////// { // register taskSet CellSpursTasksetAttribute taskSetAttribute; uint8_t prios[8] = {15, 15, 15, 15, 15, 15, 0, 0}; int ret = cellSpursTasksetAttributeInitialize( &taskSetAttribute, 0, prios, 8 ); PS3_ASSERT_CELL_ERROR(ret); ret = cellSpursTasksetAttributeSetName(&taskSetAttribute, "edgeZlibDeflateTaskSet"); PS3_ASSERT_CELL_ERROR(ret); ret = cellSpursCreateTasksetWithAttribute( pSpurs, &s_taskSetDeflate, &taskSetAttribute ); PS3_ASSERT_CELL_ERROR(ret); ret = cellSpursEventFlagInitializeIWL( pSpurs, &s_eventFlagDeflate, CELL_SPURS_EVENT_FLAG_CLEAR_AUTO, CELL_SPURS_EVENT_FLAG_SPU2PPU ); PS3_ASSERT_CELL_ERROR(ret); ret = cellSpursEventFlagAttachLv2EventQueue( &s_eventFlagDeflate ); PS3_ASSERT_CELL_ERROR(ret); #ifndef _CONTENT_PACKAGE printf_verbose( "PPU: Event flag created\n" ); #endif uint32_t deflateQueueBuffSize = edgeZlibGetDeflateQueueSize( kMaxNumDeflateQueueEntries ); s_pDeflateQueueBuffer = memalign( EDGE_ZLIB_DEFLATE_QUEUE_ALIGN, deflateQueueBuffSize ); s_deflateQueue = edgeZlibCreateDeflateQueue( s_pSpurs, kMaxNumDeflateQueueEntries, s_pDeflateQueueBuffer, deflateQueueBuffSize ); #ifndef _CONTENT_PACKAGE printf_verbose( "PPU: Deflate Queue created\n" ); #endif ////////////////////////////////////////////////////////////////////////// // // Build Deflate Tasks. // We want the compression to be able to run in parallel on multiple // SPUs so we create as many tasks as SPUs that we want it to run // on (kNumDeflateTasks). // ////////////////////////////////////////////////////////////////////////// s_pDeflateTaskContext[kNumDeflateTasks]; for( uint32_t taskNum = 0 ; taskNum < kNumDeflateTasks ; taskNum++ ) { uint32_t contextSaveSize = edgeZlibGetDeflateTaskContextSaveSize(); s_pDeflateTaskContext[taskNum] = memalign( CELL_SPURS_TASK_CONTEXT_ALIGN, contextSaveSize ); edgeZlibCreateDeflateTask( &s_taskSetDeflate, s_pDeflateTaskContext[taskNum], s_deflateQueue ); } #ifndef _CONTENT_PACKAGE printf_verbose( "PPU: %d Deflate Task(s) started\n", kNumDeflateTasks ); #endif } { ////////////////////////////////////////////////////////////////////////// // // Initialize taskset and event flag // ////////////////////////////////////////////////////////////////////////// // register taskSet CellSpursTasksetAttribute taskSetAttribute; uint8_t prios[8] = {15, 15, 15, 15, 15, 15, 0, 0}; int ret = cellSpursTasksetAttributeInitialize( &taskSetAttribute, 0, prios, 8 ); PS3_ASSERT_CELL_ERROR(ret); ret = cellSpursTasksetAttributeSetName(&taskSetAttribute, "edgeZlibInflateTaskSet"); PS3_ASSERT_CELL_ERROR(ret); ret = cellSpursCreateTasksetWithAttribute( s_pSpurs, &s_taskSetInflate, &taskSetAttribute ); PS3_ASSERT_CELL_ERROR(ret); #ifndef _CONTENT_PACKAGE printf_verbose( "PPU: Inflate Taskset created\n" ); #endif ret = cellSpursEventFlagInitializeIWL( s_pSpurs, &s_eventFlagInflate, CELL_SPURS_EVENT_FLAG_CLEAR_AUTO, CELL_SPURS_EVENT_FLAG_SPU2PPU ); PS3_ASSERT_CELL_ERROR(ret); ret = cellSpursEventFlagAttachLv2EventQueue( &s_eventFlagInflate ); PS3_ASSERT_CELL_ERROR(ret); #ifndef _CONTENT_PACKAGE printf_verbose( "PPU: Event flag created\n" ); #endif ////////////////////////////////////////////////////////////////////////// // // Initialize Inflate Queue. // This will hold the queue of work that the Inflate Task(s) on SPU will // pull work from. // ////////////////////////////////////////////////////////////////////////// uint32_t inflateQueueBuffSize = edgeZlibGetInflateQueueSize( kMaxNumInflateQueueEntries ); s_pInflateQueueBuffer = memalign( EDGE_ZLIB_INFLATE_QUEUE_ALIGN, inflateQueueBuffSize ); s_inflateQueue = edgeZlibCreateInflateQueue( s_pSpurs, kMaxNumInflateQueueEntries, s_pInflateQueueBuffer, inflateQueueBuffSize ); #ifndef _CONTENT_PACKAGE printf_verbose( "PPU: Inflate Queue created\n" ); #endif ////////////////////////////////////////////////////////////////////////// // // Build Inflate Tasks. // We want the compression to be able to run in parallel on multiple // SPUs so we create as many tasks as SPUs that we want it to run // on (kNumInflateTasks). // ////////////////////////////////////////////////////////////////////////// for( uint32_t taskNum = 0 ; taskNum < kNumInflateTasks ; taskNum++ ) { uint32_t contextSaveSize = edgeZlibGetInflateTaskContextSaveSize(); s_pInflateTaskContext[taskNum] = memalign( CELL_SPURS_TASK_CONTEXT_ALIGN, contextSaveSize ); edgeZlibCreateInflateTask( &s_taskSetInflate, s_pInflateTaskContext[taskNum], s_inflateQueue ); } #ifndef _CONTENT_PACKAGE printf_verbose( "PPU: %d Inflate Task(s) started\n", kNumInflateTasks ); #endif } } bool EdgeZLib::Compress(void* pDestination, uint32_t* pDestSize, const void* pSource, uint32_t SrcSize) { EnterCriticalSection(&s_critSect); ////////////////////////////////////////////////////////////////////////// // // Add one item to the Deflate Queue. // The Deflate Task will wake up and process this work. // ////////////////////////////////////////////////////////////////////////// uint32_t* pDst = NULL; bool findingSizeOnly = false; if(pDestination == NULL && *pDestSize == 0) { pDst = (uint32_t*)malloc(SrcSize); findingSizeOnly = true; *pDestSize = SrcSize; } else { pDst = ((uint32_t*)pDestination); } pDst[0] = SrcSize; // 4 byte header added for source size s_numElementsToCompress = 1; //Must be set correctly before any elements are added uint16_t eventFlagBits = 1; uint32_t compressionLevel = 9; static uint32_t s_compressedSize; //Cannot be on stack // create one task queue entry (max 64K deflate per entry) edgeZlibAddDeflateQueueElement( s_deflateQueue, pSource, SrcSize, &pDst[1], *pDestSize, &s_compressedSize, &s_numElementsToCompress, //This will be decremented by 1 when this element is compressed &s_eventFlagDeflate, //When s_numElementsToCompress decrements to zero, this event will be triggered eventFlagBits, compressionLevel, kEdgeZlibDeflateTask_DeflateStoreCompressed ); #ifndef _CONTENT_PACKAGE printf_verbose( "PPU: Deflate element(s) added to queue\n" ); printf_verbose( "PPU: Wait for event flag acknowledgment\n" ); #endif ////////////////////////////////////////////////////////////////////////// // // Wait for the event flag to be acknowledged in order to tell us the // work is done. // ////////////////////////////////////////////////////////////////////////// int ret = cellSpursEventFlagWait( &s_eventFlagDeflate, &eventFlagBits, CELL_SPURS_EVENT_FLAG_AND ); PS3_ASSERT_CELL_ERROR(ret ); assert( s_numElementsToCompress == 0 ); //Should have reached zero by now #ifndef _CONTENT_PACKAGE printf_verbose( "PPU: Edge Zlib event acknowledged\n" ); #endif // ensure Deflate task told us it stored the compressed data assert( (s_compressedSize & kEdgeZlibDeflateTask_HowStoredMask) == kEdgeZlibDeflateTask_CompressedWasStored ); // remove bit from s_compressedSize s_compressedSize &= ~kEdgeZlibDeflateTask_HowStoredMask; ////////////////////////////////////////////////////////////////////////// // // Print stats // ////////////////////////////////////////////////////////////////////////// #ifndef _CONTENT_PACKAGE printf_verbose( "\n uncompSiz compSiz compress%% numTasks\n" ); printf_verbose( "___________________________________________\n" ); #endif float compressionPercent = (1.f - (float)s_compressedSize / (float)SrcSize) * 100.f; #ifndef _CONTENT_PACKAGE printf_verbose( " 0x%05x 0x%05x %8.2f%% %d\n\n", (int)SrcSize, (int)s_compressedSize, compressionPercent, kNumDeflateTasks ); #endif *pDestSize = s_compressedSize + 4; // 4 byte header for size if(findingSizeOnly) free(pDst); LeaveCriticalSection(&s_critSect); return true; } void EdgeZLib::Shutdown() { ////////////////////////////////////////////////////////////////////////// // // Shutdown Deflate Queue, event flag and taskset. // ////////////////////////////////////////////////////////////////////////// #ifndef _CONTENT_PACKAGE printf_verbose( "PPU: Shutdown Deflate Queue...\n" ); #endif edgeZlibShutdownDeflateQueue( s_deflateQueue ); int ret = cellSpursEventFlagDetachLv2EventQueue( &s_eventFlagDeflate ); PS3_ASSERT_CELL_ERROR(ret); // shutdown taskSet #ifndef _CONTENT_PACKAGE printf_verbose( "PPU: Wait for taskset shutdown...\n" ); #endif ret = cellSpursShutdownTaskset( &s_taskSetDeflate ); PS3_ASSERT_CELL_ERROR(ret); // wait for all tasks to finish ret = cellSpursJoinTaskset( &s_taskSetDeflate ); PS3_ASSERT_CELL_ERROR(ret); #ifndef _CONTENT_PACKAGE printf_verbose( "PPU: Shutdown taskset completed.\n" ); #endif // free alloc'd buffers for( uint32_t taskNum = 0 ; taskNum < kNumDeflateTasks ; taskNum++ ) { free( s_pDeflateTaskContext[taskNum] ); } free( s_pDeflateQueueBuffer ); ////////////////////////////////////////////////////////////////////////// // // Shutdown Inflate Queue, event flag and taskset. // ////////////////////////////////////////////////////////////////////////// #ifndef _CONTENT_PACKAGE printf_verbose( "PPU: Shutdown Inflate Queue...\n" ); #endif edgeZlibShutdownInflateQueue( s_inflateQueue ); ret = cellSpursEventFlagDetachLv2EventQueue( &s_eventFlagInflate ); PS3_ASSERT_CELL_ERROR(ret); // shutdown taskSet #ifndef _CONTENT_PACKAGE printf_verbose( "PPU: Wait for taskset shutdown...\n" ); #endif ret = cellSpursShutdownTaskset( &s_taskSetInflate ); PS3_ASSERT_CELL_ERROR(ret); // wait for all tasks to finish ret = cellSpursJoinTaskset( &s_taskSetInflate ); PS3_ASSERT_CELL_ERROR(ret); #ifndef _CONTENT_PACKAGE printf_verbose( "PPU: Shutdown taskset completed.\n" ); #endif // free alloc'd buffers for( uint32_t taskNum = 0 ; taskNum < kNumInflateTasks ; taskNum++ ) { free( s_pInflateTaskContext[taskNum] ); } free( s_pInflateQueueBuffer ); } bool EdgeZLib::Decompress(void* pDestination, uint32_t* pDestSize, const void* pSource, uint32_t SrcSize) { #if 1 EnterCriticalSection(&s_critSect); const uint32_t* pSrc = (uint32_t*)pSource; uint32_t uncompressedSize = pSrc[0]; assert(uncompressedSize <= (*pDestSize)); ////////////////////////////////////////////////////////////////////////// // // Add one item to the Inflate Queue. // The Inflate Task will wake up and process this work. // ////////////////////////////////////////////////////////////////////////// s_numElementsToDecompress = 1; //Must be set correctly before any elements are added uint16_t eventFlagBits = 1; // create one task queue entry (max 64K inflate per entry) edgeZlibAddInflateQueueElement( s_inflateQueue, &pSrc[1], SrcSize, pDestination, uncompressedSize, &s_numElementsToDecompress, //This will be decremented by 1 when this element is compressed &s_eventFlagInflate, //When s_numElementsToDecompress decrements to zero, this event will be triggered eventFlagBits, kEdgeZlibInflateTask_Inflate ); #ifndef _CONTENT_PACKAGE printf_verbose( "PPU: Inflate element(s) added to queue\n" ); printf_verbose( "PPU: Wait for event flag acknowledgment\n" ); #endif ////////////////////////////////////////////////////////////////////////// // // Wait for the event flag to be acknowledged in order to tell us the // work is done. // ////////////////////////////////////////////////////////////////////////// int ret = cellSpursEventFlagWait( &s_eventFlagInflate, &eventFlagBits, CELL_SPURS_EVENT_FLAG_AND ); PS3_ASSERT_CELL_ERROR(ret); assert( s_numElementsToDecompress == 0 ); //Should have reached zero by now #ifndef _CONTENT_PACKAGE printf_verbose( "PPU: Edge Zlib event acknowledged\n" ); #endif *pDestSize = uncompressedSize; LeaveCriticalSection(&s_critSect); return true; #endif // 0 }