diff --git a/examples/common/common.cpp b/examples/common/common.cpp index 2d0fb605a..0235c53de 100644 --- a/examples/common/common.cpp +++ b/examples/common/common.cpp @@ -1589,10 +1589,18 @@ bool SDGenerationParams::from_json_str( LOG_ERROR("invalid init_image"); return false; } + if (!parse_image_json_field(j, "end_image", 3, width, height, end_image)) { + LOG_ERROR("invalid end_image"); + return false; + } if (!parse_image_array_json_field(j, "ref_images", 3, width, height, ref_images)) { LOG_ERROR("invalid ref_images"); return false; } + if (!parse_image_array_json_field(j, "control_frames", 3, width, height, control_frames)) { + LOG_ERROR("invalid control_frames"); + return false; + } if (!parse_image_json_field(j, "mask_image", 1, width, height, mask_image)) { LOG_ERROR("invalid mask_image"); return false; diff --git a/examples/common/media_io.cpp b/examples/common/media_io.cpp index 0b8b3a27b..e2e1ca5a3 100644 --- a/examples/common/media_io.cpp +++ b/examples/common/media_io.cpp @@ -95,6 +95,57 @@ using WebPMuxPtr = std::unique_ptr; using WebPAnimEncoderPtr = std::unique_ptr; #endif +#ifdef SD_USE_WEBM +class MemoryMkvWriter : public mkvmuxer::IMkvWriter { +public: + mkvmuxer::int32 Write(const void* buf, mkvmuxer::uint32 len) override { + if (buf == nullptr && len > 0) { + return -1; + } + const size_t end_pos = position_ + static_cast(len); + if (end_pos > data_.size()) { + data_.resize(end_pos); + } + if (len > 0) { + memcpy(data_.data() + position_, buf, len); + } + position_ = end_pos; + return 0; + } + + mkvmuxer::int64 Position() const override { + return static_cast(position_); + } + + mkvmuxer::int32 Position(mkvmuxer::int64 position) override { + if (position < 0) { + return -1; + } + const size_t target = static_cast(position); + if (target > data_.size()) { + data_.resize(target); + } + position_ = target; + return 0; + } + + bool Seekable() const override { + return true; + } + + void ElementStartNotify(mkvmuxer::uint64, mkvmuxer::int64) override { + } + + const std::vector& data() const { + return data_; + } + +private: + std::vector data_; + size_t position_ = 0; +}; +#endif + bool read_binary_file_bytes(const char* path, std::vector& data) { std::ifstream fin(fs::path(path), std::ios::binary); if (!fin) { @@ -570,6 +621,32 @@ void write_u16_le(FILE* f, uint16_t val) { fwrite(&val, 2, 1, f); } +void write_u32_le(std::vector& data, uint32_t val) { + data.push_back(static_cast(val & 0xFF)); + data.push_back(static_cast((val >> 8) & 0xFF)); + data.push_back(static_cast((val >> 16) & 0xFF)); + data.push_back(static_cast((val >> 24) & 0xFF)); +} + +void write_u16_le(std::vector& data, uint16_t val) { + data.push_back(static_cast(val & 0xFF)); + data.push_back(static_cast((val >> 8) & 0xFF)); +} + +void patch_u32_le(std::vector& data, size_t offset, uint32_t val) { + if (offset + 4 > data.size()) { + return; + } + data[offset + 0] = static_cast(val & 0xFF); + data[offset + 1] = static_cast((val >> 8) & 0xFF); + data[offset + 2] = static_cast((val >> 16) & 0xFF); + data[offset + 3] = static_cast((val >> 24) & 0xFF); +} + +void write_fourcc(std::vector& data, const char* fourcc) { + data.insert(data.end(), fourcc, fourcc + 4); +} + EncodedImageFormat encoded_image_format_from_path(const std::string& path) { std::string ext = fs::path(path).extension().string(); std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower); @@ -699,95 +776,96 @@ uint8_t* load_image_from_memory(const char* image_bytes, return load_image_common(true, image_bytes, len, width, height, expected_width, expected_height, expected_channel); } -int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { +std::vector create_mjpg_avi_from_sd_images_to_vector(sd_image_t* images, int num_images, int fps, int quality) { if (num_images == 0) { fprintf(stderr, "Error: Image array is empty.\n"); - return -1; + return {}; } - FilePtr file(fopen(filename, "wb")); - if (!file) { - perror("Error opening file for writing"); - return -1; - } - FILE* f = file.get(); - uint32_t width = images[0].width; uint32_t height = images[0].height; uint32_t channels = images[0].channel; if (channels != 3 && channels != 4) { fprintf(stderr, "Error: Unsupported channel count: %u\n", channels); - return -1; - } - - fwrite("RIFF", 4, 1, f); - long riff_size_pos = ftell(f); - write_u32_le(f, 0); - fwrite("AVI ", 4, 1, f); - - fwrite("LIST", 4, 1, f); - write_u32_le(f, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40); - fwrite("hdrl", 4, 1, f); - - fwrite("avih", 4, 1, f); - write_u32_le(f, 56); - write_u32_le(f, 1000000 / fps); - write_u32_le(f, 0); - write_u32_le(f, 0); - write_u32_le(f, 0x110); - write_u32_le(f, num_images); - write_u32_le(f, 0); - write_u32_le(f, 1); - write_u32_le(f, width * height * 3); - write_u32_le(f, width); - write_u32_le(f, height); - write_u32_le(f, 0); - write_u32_le(f, 0); - write_u32_le(f, 0); - write_u32_le(f, 0); - - fwrite("LIST", 4, 1, f); - write_u32_le(f, 4 + 8 + 56 + 8 + 40); - fwrite("strl", 4, 1, f); - - fwrite("strh", 4, 1, f); - write_u32_le(f, 56); - fwrite("vids", 4, 1, f); - fwrite("MJPG", 4, 1, f); - write_u32_le(f, 0); - write_u16_le(f, 0); - write_u16_le(f, 0); - write_u32_le(f, 0); - write_u32_le(f, 1); - write_u32_le(f, fps); - write_u32_le(f, 0); - write_u32_le(f, num_images); - write_u32_le(f, width * height * 3); - write_u32_le(f, (uint32_t)-1); - write_u32_le(f, 0); - write_u16_le(f, 0); - write_u16_le(f, 0); - write_u16_le(f, 0); - write_u16_le(f, 0); - - fwrite("strf", 4, 1, f); - write_u32_le(f, 40); - write_u32_le(f, 40); - write_u32_le(f, width); - write_u32_le(f, height); - write_u16_le(f, 1); - write_u16_le(f, 24); - fwrite("MJPG", 4, 1, f); - write_u32_le(f, width * height * 3); - write_u32_le(f, 0); - write_u32_le(f, 0); - write_u32_le(f, 0); - write_u32_le(f, 0); - - fwrite("LIST", 4, 1, f); - long movi_size_pos = ftell(f); - write_u32_le(f, 0); - fwrite("movi", 4, 1, f); + return {}; + } + + // stb_image_write changes JPEG sampling behavior above quality 90. + // MJPG AVI playback is more compatible when we keep the encoder on the + // <= 90 path. + const int mjpg_quality = std::clamp(quality, 1, 90); + + std::vector avi_data; + avi_data.reserve(static_cast(num_images) * 1024); + + write_fourcc(avi_data, "RIFF"); + const size_t riff_size_pos = avi_data.size(); + write_u32_le(avi_data, 0); + write_fourcc(avi_data, "AVI "); + + write_fourcc(avi_data, "LIST"); + write_u32_le(avi_data, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40); + write_fourcc(avi_data, "hdrl"); + + write_fourcc(avi_data, "avih"); + write_u32_le(avi_data, 56); + write_u32_le(avi_data, 1000000 / fps); + write_u32_le(avi_data, 0); + write_u32_le(avi_data, 0); + write_u32_le(avi_data, 0x110); + write_u32_le(avi_data, num_images); + write_u32_le(avi_data, 0); + write_u32_le(avi_data, 1); + write_u32_le(avi_data, width * height * 3); + write_u32_le(avi_data, width); + write_u32_le(avi_data, height); + write_u32_le(avi_data, 0); + write_u32_le(avi_data, 0); + write_u32_le(avi_data, 0); + write_u32_le(avi_data, 0); + + write_fourcc(avi_data, "LIST"); + write_u32_le(avi_data, 4 + 8 + 56 + 8 + 40); + write_fourcc(avi_data, "strl"); + + write_fourcc(avi_data, "strh"); + write_u32_le(avi_data, 56); + write_fourcc(avi_data, "vids"); + write_fourcc(avi_data, "MJPG"); + write_u32_le(avi_data, 0); + write_u16_le(avi_data, 0); + write_u16_le(avi_data, 0); + write_u32_le(avi_data, 0); + write_u32_le(avi_data, 1); + write_u32_le(avi_data, fps); + write_u32_le(avi_data, 0); + write_u32_le(avi_data, num_images); + write_u32_le(avi_data, width * height * 3); + write_u32_le(avi_data, static_cast(-1)); + write_u32_le(avi_data, 0); + write_u16_le(avi_data, 0); + write_u16_le(avi_data, 0); + write_u16_le(avi_data, 0); + write_u16_le(avi_data, 0); + + write_fourcc(avi_data, "strf"); + write_u32_le(avi_data, 40); + write_u32_le(avi_data, 40); + write_u32_le(avi_data, width); + write_u32_le(avi_data, height); + write_u16_le(avi_data, 1); + write_u16_le(avi_data, 24); + write_fourcc(avi_data, "MJPG"); + write_u32_le(avi_data, width * height * 3); + write_u32_le(avi_data, 0); + write_u32_le(avi_data, 0); + write_u32_le(avi_data, 0); + write_u32_le(avi_data, 0); + + write_fourcc(avi_data, "LIST"); + const size_t movi_size_pos = avi_data.size(); + write_u32_le(avi_data, 0); + write_fourcc(avi_data, "movi"); std::vector index(static_cast(num_images)); std::vector jpeg_data; @@ -801,55 +879,61 @@ int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int buffer->insert(buffer->end(), src, src + size); }; - if (!stbi_write_jpg_to_func(write_to_buf, &jpeg_data, images[i].width, images[i].height, channels, images[i].data, quality)) { + if (!stbi_write_jpg_to_func(write_to_buf, &jpeg_data, images[i].width, images[i].height, channels, images[i].data, mjpg_quality)) { fprintf(stderr, "Error: Failed to encode JPEG frame.\n"); - return -1; + return {}; } - fwrite("00dc", 4, 1, f); - write_u32_le(f, (uint32_t)jpeg_data.size()); - index[i].offset = ftell(f) - 8; - index[i].size = (uint32_t)jpeg_data.size(); - fwrite(jpeg_data.data(), 1, jpeg_data.size(), f); + index[i].offset = static_cast(avi_data.size()); + write_fourcc(avi_data, "00dc"); + write_u32_le(avi_data, static_cast(jpeg_data.size())); + index[i].size = (uint32_t)jpeg_data.size(); + avi_data.insert(avi_data.end(), jpeg_data.begin(), jpeg_data.end()); if (jpeg_data.size() % 2) { - fputc(0, f); + avi_data.push_back(0); } } - long cur_pos = ftell(f); - long movi_size = cur_pos - movi_size_pos - 4; - fseek(f, movi_size_pos, SEEK_SET); - write_u32_le(f, movi_size); - fseek(f, cur_pos, SEEK_SET); + const size_t movi_size = avi_data.size() - movi_size_pos - 4; + patch_u32_le(avi_data, movi_size_pos, static_cast(movi_size)); - fwrite("idx1", 4, 1, f); - write_u32_le(f, num_images * 16); + write_fourcc(avi_data, "idx1"); + write_u32_le(avi_data, num_images * 16); for (int i = 0; i < num_images; i++) { - fwrite("00dc", 4, 1, f); - write_u32_le(f, 0x10); - write_u32_le(f, index[i].offset); - write_u32_le(f, index[i].size); + write_fourcc(avi_data, "00dc"); + write_u32_le(avi_data, 0x10); + write_u32_le(avi_data, index[i].offset); + write_u32_le(avi_data, index[i].size); } - cur_pos = ftell(f); - long file_size = cur_pos - riff_size_pos - 4; - fseek(f, riff_size_pos, SEEK_SET); - write_u32_le(f, file_size); - fseek(f, cur_pos, SEEK_SET); + const size_t file_size = avi_data.size() - riff_size_pos - 4; + patch_u32_le(avi_data, riff_size_pos, static_cast(file_size)); + + return avi_data; +} +int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { + std::vector avi_data = create_mjpg_avi_from_sd_images_to_vector(images, num_images, fps, quality); + if (avi_data.empty()) { + return -1; + } + if (!write_binary_file_bytes(filename, avi_data)) { + perror("Error opening file for writing"); + return -1; + } return 0; } #ifdef SD_USE_WEBP -int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { +std::vector create_animated_webp_from_sd_images_to_vector(sd_image_t* images, int num_images, int fps, int quality) { if (num_images == 0) { fprintf(stderr, "Error: Image array is empty.\n"); - return -1; + return {}; } if (fps <= 0) { fprintf(stderr, "Error: FPS must be positive.\n"); - return -1; + return {}; } const int width = static_cast(images[0].width); @@ -857,14 +941,14 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images const int channels = static_cast(images[0].channel); if (channels != 1 && channels != 3 && channels != 4) { fprintf(stderr, "Error: Unsupported channel count: %d\n", channels); - return -1; + return {}; } WebPAnimEncoderOptions anim_options; WebPConfig config; if (!WebPAnimEncoderOptionsInit(&anim_options) || !WebPConfigInit(&config)) { fprintf(stderr, "Error: Failed to initialize WebP animation encoder.\n"); - return -1; + return {}; } config.quality = static_cast(quality); @@ -875,13 +959,13 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images } if (!WebPValidateConfig(&config)) { fprintf(stderr, "Error: Invalid WebP encoder configuration.\n"); - return -1; + return {}; } WebPAnimEncoderPtr enc(WebPAnimEncoderNew(width, height, &anim_options)); if (enc == nullptr) { fprintf(stderr, "Error: Could not create WebPAnimEncoder object.\n"); - return -1; + return {}; } const int frame_duration_ms = std::max(1, static_cast(std::lround(1000.0 / static_cast(fps)))); @@ -891,13 +975,13 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images const sd_image_t& image = images[i]; if (static_cast(image.width) != width || static_cast(image.height) != height) { fprintf(stderr, "Error: Frame dimensions do not match.\n"); - return -1; + return {}; } WebPPictureGuard picture; if (!picture.initialized) { fprintf(stderr, "Error: Failed to initialize WebPPicture.\n"); - return -1; + return {}; } picture.picture.use_argb = 1; picture.picture.width = width; @@ -921,12 +1005,12 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images if (!picture_ok) { fprintf(stderr, "Error: Failed to import frame into WebPPicture.\n"); - return -1; + return {}; } if (!WebPAnimEncoderAdd(enc.get(), &picture.picture, timestamp_ms, &config)) { fprintf(stderr, "Error: Failed to add frame to animated WebP: %s\n", WebPAnimEncoderGetError(enc.get())); - return -1; + return {}; } timestamp_ms += frame_duration_ms; @@ -934,52 +1018,50 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images if (!WebPAnimEncoderAdd(enc.get(), nullptr, timestamp_ms, nullptr)) { fprintf(stderr, "Error: Failed to finalize animated WebP frames: %s\n", WebPAnimEncoderGetError(enc.get())); - return -1; + return {}; } WebPDataGuard webp_data; if (!WebPAnimEncoderAssemble(enc.get(), &webp_data.data)) { fprintf(stderr, "Error: Failed to assemble animated WebP: %s\n", WebPAnimEncoderGetError(enc.get())); - return -1; + return {}; } - FilePtr f(fopen(filename, "wb")); - if (!f) { - perror("Error opening file for writing"); + return std::vector(webp_data.data.bytes, webp_data.data.bytes + webp_data.data.size); +} + +int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { + std::vector webp_data = create_animated_webp_from_sd_images_to_vector(images, num_images, fps, quality); + if (webp_data.empty()) { return -1; } - if (webp_data.data.size > 0 && fwrite(webp_data.data.bytes, 1, webp_data.data.size, f.get()) != webp_data.data.size) { - fprintf(stderr, "Error: Failed to write animated WebP file.\n"); + if (!write_binary_file_bytes(filename, webp_data)) { + perror("Error opening file for writing"); return -1; } - return 0; } #endif #ifdef SD_USE_WEBM -int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { +std::vector create_webm_from_sd_images_to_vector(sd_image_t* images, int num_images, int fps, int quality) { if (num_images == 0) { fprintf(stderr, "Error: Image array is empty.\n"); - return -1; + return {}; } if (fps <= 0) { fprintf(stderr, "Error: FPS must be positive.\n"); - return -1; + return {}; } const int width = static_cast(images[0].width); const int height = static_cast(images[0].height); if (width <= 0 || height <= 0) { fprintf(stderr, "Error: Invalid frame dimensions.\n"); - return -1; + return {}; } - mkvmuxer::MkvWriter writer; - if (!writer.Open(filename)) { - fprintf(stderr, "Error: Could not open WebM file for writing.\n"); - return -1; - } + MemoryMkvWriter writer; const int ret = [&]() -> int { mkvmuxer::Segment segment; @@ -1045,30 +1127,63 @@ int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num } return 0; }(); - writer.Close(); - return ret; + if (ret != 0) { + return {}; + } + return writer.data(); +} + +int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { + std::vector webm_data = create_webm_from_sd_images_to_vector(images, num_images, fps, quality); + if (webm_data.empty()) { + return -1; + } + if (!write_binary_file_bytes(filename, webm_data)) { + perror("Error opening file for writing"); + return -1; + } + return 0; } #endif -int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { - std::string path = filename ? filename : ""; - auto pos = path.find_last_of('.'); - std::string ext = pos == std::string::npos ? "" : path.substr(pos); - for (char& ch : ext) { - ch = static_cast(tolower(static_cast(ch))); +std::vector create_video_from_sd_images_to_vector(const std::string& output_format, + sd_image_t* images, + int num_images, + int fps, + int quality) { + std::string format = output_format; + std::transform(format.begin(), format.end(), format.begin(), + [](unsigned char c) { return static_cast(tolower(c)); }); + if (!format.empty() && format[0] == '.') { + format.erase(format.begin()); } #ifdef SD_USE_WEBM - if (ext == ".webm") { - return create_webm_from_sd_images(filename, images, num_images, fps, quality); + if (format == "webm") { + return create_webm_from_sd_images_to_vector(images, num_images, fps, quality); } #endif #ifdef SD_USE_WEBP - if (ext == ".webp") { - return create_animated_webp_from_sd_images(filename, images, num_images, fps, quality); + if (format == "webp") { + return create_animated_webp_from_sd_images_to_vector(images, num_images, fps, quality); } #endif - return create_mjpg_avi_from_sd_images(filename, images, num_images, fps, quality); + return create_mjpg_avi_from_sd_images_to_vector(images, num_images, fps, quality); +} + +int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { + std::string path = filename ? filename : ""; + auto pos = path.find_last_of('.'); + std::string ext = pos == std::string::npos ? "" : path.substr(pos); + std::vector video_data = create_video_from_sd_images_to_vector(ext, images, num_images, fps, quality); + if (video_data.empty()) { + return -1; + } + if (!write_binary_file_bytes(filename, video_data)) { + perror("Error opening file for writing"); + return -1; + } + return 0; } diff --git a/examples/common/media_io.h b/examples/common/media_io.h index e6ca098d9..6b3f6f883 100644 --- a/examples/common/media_io.h +++ b/examples/common/media_io.h @@ -58,6 +58,10 @@ int create_mjpg_avi_from_sd_images(const char* filename, int num_images, int fps, int quality = 90); +std::vector create_mjpg_avi_from_sd_images_to_vector(sd_image_t* images, + int num_images, + int fps, + int quality = 90); #ifdef SD_USE_WEBP int create_animated_webp_from_sd_images(const char* filename, @@ -65,6 +69,10 @@ int create_animated_webp_from_sd_images(const char* filename, int num_images, int fps, int quality = 90); +std::vector create_animated_webp_from_sd_images_to_vector(sd_image_t* images, + int num_images, + int fps, + int quality = 90); #endif #ifdef SD_USE_WEBM @@ -73,6 +81,10 @@ int create_webm_from_sd_images(const char* filename, int num_images, int fps, int quality = 90); +std::vector create_webm_from_sd_images_to_vector(sd_image_t* images, + int num_images, + int fps, + int quality = 90); #endif int create_video_from_sd_images(const char* filename, @@ -80,5 +92,10 @@ int create_video_from_sd_images(const char* filename, int num_images, int fps, int quality = 90); +std::vector create_video_from_sd_images_to_vector(const std::string& output_format, + sd_image_t* images, + int num_images, + int fps, + int quality = 90); #endif // __MEDIA_IO_H__ diff --git a/examples/server/api.md b/examples/server/api.md index 8f8bf9edd..39744dbed 100644 --- a/examples/server/api.md +++ b/examples/server/api.md @@ -9,7 +9,7 @@ The server currently exposes three API families: - `sdcpp API` under `/sdcpp/v1/...` The `sdcpp API` is the native API surface. -Its request schema is also the canonical schema for `sd_cpp_extra_args`. +Its request schema is the same schema used by `sd_cpp_extra_args`. Global LoRA rule: @@ -55,8 +55,6 @@ Current endpoints include: - `POST /sdcpp/v1/jobs/{id}/cancel` - `POST /sdcpp/v1/vid_gen` -`POST /sdcpp/v1/vid_gen` is currently exposed but returns `501 Not Implemented`. - ## `sd_cpp_extra_args` `sd_cpp_extra_args` is an extension mechanism for the compatibility APIs. @@ -79,12 +77,12 @@ Behavior: - The JSON block is parsed using the same field rules as the `sdcpp API`. - The block is removed from the final prompt before generation. -Intended use: +Supported use: - extend `OpenAI API` requests with native `stable-diffusion.cpp` controls - extend `sdapi` requests with native `stable-diffusion.cpp` controls -Not intended use: +Unsupported use: - do not use `sd_cpp_extra_args` with `/sdcpp/v1/*` @@ -372,20 +370,25 @@ Field types: Returns frontend-friendly capability metadata. -Typical contents: +The mode-aware fields are the primary interface. The top-level compatibility fields are deprecated mirrors kept for older clients. -| Field | Type | -| --- | --- | -| `model` | `object` | -| `defaults` | `object` | -| `loras` | `array` | -| `samplers` | `array` | -| `schedulers` | `array` | -| `output_formats` | `array` | -| `limits` | `object` | -| `features` | `object` | +Top-level fields: -Nested fields currently returned: +| Field | Type | Notes | +| --- | --- | --- | +| `model` | `object` | Loaded model metadata | +| `current_mode` | `string` | The native generation mode mirrored by top-level compatibility fields | +| `supported_modes` | `array` | Supported native modes such as `img_gen` or `vid_gen` | +| `defaults` | `object` | Deprecated compatibility mirror of `defaults_by_mode[current_mode]` | +| `output_formats` | `array` | Deprecated compatibility mirror of `output_formats_by_mode[current_mode]` | +| `features` | `object` | Deprecated compatibility mirror of `features_by_mode[current_mode]` | +| `defaults_by_mode` | `object` | Explicit defaults for each supported mode | +| `output_formats_by_mode` | `object` | Explicit output formats for each supported mode | +| `features_by_mode` | `object` | Explicit feature flags for each supported mode | +| `samplers` | `array` | Available sampling methods | +| `schedulers` | `array` | Available schedulers | +| `loras` | `array` | Available LoRA entries | +| `limits` | `object` | Shared queue and size limits | `model` @@ -395,50 +398,24 @@ Nested fields currently returned: | `model.stem` | `string` | | `model.path` | `string` | -`defaults` +Compatibility rules: + +- `defaults`, `output_formats`, and `features` are deprecated compatibility mirrors +- those three top-level fields always mirror `current_mode` +- `supported_modes`, `defaults_by_mode`, `output_formats_by_mode`, and `features_by_mode` are the mode-aware fields + +Mode-aware objects: | Field | Type | | --- | --- | -| `defaults.prompt` | `string` | -| `defaults.negative_prompt` | `string` | -| `defaults.clip_skip` | `integer` | -| `defaults.width` | `integer` | -| `defaults.height` | `integer` | -| `defaults.strength` | `number` | -| `defaults.seed` | `integer` | -| `defaults.batch_count` | `integer` | -| `defaults.auto_resize_ref_image` | `boolean` | -| `defaults.increase_ref_index` | `boolean` | -| `defaults.control_strength` | `number` | -| `defaults.sample_params` | `object` | -| `defaults.sample_params.scheduler` | `string` | -| `defaults.sample_params.sample_method` | `string` | -| `defaults.sample_params.sample_steps` | `integer` | -| `defaults.sample_params.eta` | `number \| null` | -| `defaults.sample_params.shifted_timestep` | `integer` | -| `defaults.sample_params.flow_shift` | `number \| null` | -| `defaults.sample_params.guidance` | `object` | -| `defaults.sample_params.guidance.txt_cfg` | `number` | -| `defaults.sample_params.guidance.img_cfg` | `number \| null` | -| `defaults.sample_params.guidance.distilled_guidance` | `number` | -| `defaults.sample_params.guidance.slg` | `object` | -| `defaults.sample_params.guidance.slg.layers` | `array` | -| `defaults.sample_params.guidance.slg.layer_start` | `number` | -| `defaults.sample_params.guidance.slg.layer_end` | `number` | -| `defaults.sample_params.guidance.slg.scale` | `number` | -| `defaults.vae_tiling_params` | `object` | -| `defaults.vae_tiling_params.enabled` | `boolean` | -| `defaults.vae_tiling_params.tile_size_x` | `integer` | -| `defaults.vae_tiling_params.tile_size_y` | `integer` | -| `defaults.vae_tiling_params.target_overlap` | `number` | -| `defaults.vae_tiling_params.rel_size_x` | `number` | -| `defaults.vae_tiling_params.rel_size_y` | `number` | -| `defaults.cache_mode` | `string` | -| `defaults.cache_option` | `string` | -| `defaults.scm_mask` | `string` | -| `defaults.scm_policy_dynamic` | `boolean` | -| `defaults.output_format` | `string` | -| `defaults.output_compression` | `integer` | +| `defaults_by_mode.img_gen` | `object` | +| `defaults_by_mode.vid_gen` | `object` | +| `output_formats_by_mode.img_gen` | `array` | +| `output_formats_by_mode.vid_gen` | `array` | +| `features_by_mode.img_gen` | `object` | +| `features_by_mode.vid_gen` | `object` | + +Shared nested fields: `loras` @@ -458,19 +435,100 @@ Nested fields currently returned: | `limits.max_batch_count` | `integer` | | `limits.max_queue_size` | `integer` | -`features` +Shared default fields used by both `img_gen` and `vid_gen`: + +| Field | Type | +| --- | --- | +| `prompt` | `string` | +| `negative_prompt` | `string` | +| `clip_skip` | `integer` | +| `width` | `integer` | +| `height` | `integer` | +| `strength` | `number` | +| `seed` | `integer` | +| `sample_params` | `object` | +| `sample_params.scheduler` | `string` | +| `sample_params.sample_method` | `string` | +| `sample_params.sample_steps` | `integer` | +| `sample_params.eta` | `number \| null` | +| `sample_params.shifted_timestep` | `integer` | +| `sample_params.flow_shift` | `number \| null` | +| `sample_params.guidance.txt_cfg` | `number` | +| `sample_params.guidance.img_cfg` | `number \| null` | +| `sample_params.guidance.distilled_guidance` | `number` | +| `sample_params.guidance.slg.layers` | `array` | +| `sample_params.guidance.slg.layer_start` | `number` | +| `sample_params.guidance.slg.layer_end` | `number` | +| `sample_params.guidance.slg.scale` | `number` | +| `vae_tiling_params` | `object` | +| `vae_tiling_params.enabled` | `boolean` | +| `vae_tiling_params.tile_size_x` | `integer` | +| `vae_tiling_params.tile_size_y` | `integer` | +| `vae_tiling_params.target_overlap` | `number` | +| `vae_tiling_params.rel_size_x` | `number` | +| `vae_tiling_params.rel_size_y` | `number` | +| `cache_mode` | `string` | +| `cache_option` | `string` | +| `scm_mask` | `string` | +| `scm_policy_dynamic` | `boolean` | +| `output_format` | `string` | +| `output_compression` | `integer` | + +`img_gen`-specific default fields: + +| Field | Type | +| --- | --- | +| `batch_count` | `integer` | +| `auto_resize_ref_image` | `boolean` | +| `increase_ref_index` | `boolean` | +| `control_strength` | `number` | + +`vid_gen`-specific default fields: | Field | Type | | --- | --- | -| `features.init_image` | `boolean` | -| `features.mask_image` | `boolean` | -| `features.control_image` | `boolean` | -| `features.ref_images` | `boolean` | -| `features.lora` | `boolean` | -| `features.vae_tiling` | `boolean` | -| `features.cache` | `boolean` | -| `features.cancel_queued` | `boolean` | -| `features.cancel_generating` | `boolean` | +| `video_frames` | `integer` | +| `fps` | `integer` | +| `moe_boundary` | `number` | +| `vace_strength` | `number` | +| `high_noise_sample_params` | `object` | +| `high_noise_sample_params.scheduler` | `string` | +| `high_noise_sample_params.sample_method` | `string` | +| `high_noise_sample_params.sample_steps` | `integer` | +| `high_noise_sample_params.eta` | `number \| null` | +| `high_noise_sample_params.shifted_timestep` | `integer` | +| `high_noise_sample_params.flow_shift` | `number \| null` | +| `high_noise_sample_params.guidance.txt_cfg` | `number` | +| `high_noise_sample_params.guidance.img_cfg` | `number \| null` | +| `high_noise_sample_params.guidance.distilled_guidance` | `number` | +| `high_noise_sample_params.guidance.slg.layers` | `array` | +| `high_noise_sample_params.guidance.slg.layer_start` | `number` | +| `high_noise_sample_params.guidance.slg.layer_end` | `number` | +| `high_noise_sample_params.guidance.slg.scale` | `number` | + +Fields returned in `features_by_mode.img_gen`: + +- `init_image` +- `mask_image` +- `control_image` +- `ref_images` +- `lora` +- `vae_tiling` +- `cache` +- `cancel_queued` +- `cancel_generating` + +Fields returned in `features_by_mode.vid_gen`: + +- `init_image` +- `end_image` +- `control_frames` +- `high_noise_sample_params` +- `lora` +- `vae_tiling` +- `cache` +- `cancel_queued` +- `cancel_generating` #### `POST /sdcpp/v1/img_gen` @@ -521,9 +579,7 @@ Typical status codes: - `409 Conflict` - `410 Gone` -### Canonical Request Schema - -The `sdcpp API` request body is the canonical native schema. +### Request Body Example: @@ -612,7 +668,7 @@ Channel expectations: If omitted or null: - single-image fields map to an empty `sd_image_t` -- array fields map to `nullptr + count = 0` +- array fields map to an empty C-style array, represented as `pointer = nullptr` and `count = 0` ### Field Mapping Summary @@ -686,11 +742,11 @@ HTTP-only output fields: | `output_format` | `string` | | `output_compression` | `integer` | -### Optional Field Semantics +### Optional Field Handling -Clients should preserve unset semantics for optional sampling fields. +Optional sampling fields may be omitted. -If a user has not explicitly provided one of these fields, the client should omit it instead of injecting a guessed fallback: +When omitted, backend defaults apply to these fields: - `sample_params.scheduler` - `sample_params.sample_method` @@ -766,29 +822,394 @@ Example cancelled job: } ``` -### Validation and Retention +### Submission Errors -Recommended behavior: +`POST /sdcpp/v1/img_gen` may return: -- malformed JSON returns `400` -- invalid image payloads return `400` -- invalid parameter structure returns `400` -- queue full returns `429` or `503` -- accepted runtime failures transition the job to `failed` -- unsupported in-progress cancellation may return `409` +- `202 Accepted` when the job is created +- `400 Bad Request` for an empty body, unsupported model mode, invalid JSON, or invalid generation parameters +- `429 Too Many Requests` when the job queue is full +- `500 Internal Server Error` for unexpected server exceptions during submission -Recommended retention controls: +### `vid_gen` -- pending job limit -- completed job TTL -- failed job TTL +The following section documents the native async contract for video generation. -### Future `vid_gen` +#### `POST /sdcpp/v1/vid_gen` -Future `vid_gen` should reuse the same async job model: +Submits an async video generation job. -- `POST /sdcpp/v1/vid_gen` -- `GET /sdcpp/v1/jobs/{id}` -- `POST /sdcpp/v1/jobs/{id}/cancel` +Successful submission returns `202 Accepted`. + +Example response: + +```json +{ + "id": "job_01HTXYZVID", + "kind": "vid_gen", + "status": "queued", + "created": 1775401200, + "poll_url": "/sdcpp/v1/jobs/job_01HTXYZVID" +} +``` + +Response fields: + +| Field | Type | +| --- | --- | +| `id` | `string` | +| `kind` | `string` | +| `status` | `string` | +| `created` | `integer` | +| `poll_url` | `string` | + +### Request Body + +Compared with `img_gen`, the `vid_gen` request body: + +- `vid_gen` is a single video sequence job, so `batch_count` is not part of the request schema +- `ref_images`, `mask_image`, `control_image`, `control_strength`, and `embed_image_metadata` are not part of the request schema +- `vid_gen` adds `end_image`, `control_frames`, `high_noise_sample_params`, `video_frames`, `fps`, `moe_boundary`, and `vace_strength` + +Example: + +```json +{ + "prompt": "a cat walking through a rainy alley", + "negative_prompt": "", + "clip_skip": -1, + "width": 832, + "height": 480, + "strength": 0.75, + "seed": -1, + "video_frames": 33, + "fps": 16, + "moe_boundary": 0.875, + "vace_strength": 1.0, + + "init_image": null, + "end_image": null, + "control_frames": [], + + "sample_params": { + "scheduler": "discrete", + "sample_method": "euler", + "sample_steps": 28, + "eta": 1.0, + "shifted_timestep": 0, + "custom_sigmas": [], + "flow_shift": 0.0, + "guidance": { + "txt_cfg": 7.0, + "img_cfg": 7.0, + "distilled_guidance": 3.5, + "slg": { + "layers": [7, 8, 9], + "layer_start": 0.01, + "layer_end": 0.2, + "scale": 0.0 + } + } + }, + + "high_noise_sample_params": { + "scheduler": "discrete", + "sample_method": "euler", + "sample_steps": -1, + "eta": 1.0, + "shifted_timestep": 0, + "flow_shift": 0.0, + "guidance": { + "txt_cfg": 7.0, + "img_cfg": 7.0, + "distilled_guidance": 3.5, + "slg": { + "layers": [7, 8, 9], + "layer_start": 0.01, + "layer_end": 0.2, + "scale": 0.0 + } + } + }, + + "lora": [], + + "vae_tiling_params": { + "enabled": false, + "tile_size_x": 0, + "tile_size_y": 0, + "target_overlap": 0.5, + "rel_size_x": 0.0, + "rel_size_y": 0.0 + }, + + "cache_mode": "disabled", + "cache_option": "", + "scm_mask": "", + "scm_policy_dynamic": true, + + "output_format": "webm", + "output_compression": 100 +} +``` + +### LoRA Rules + +- The server only accepts explicit LoRA entries from the `lora` field. +- Prompt-embedded `` tags are intentionally unsupported. +- `lora[].is_high_noise` controls whether a LoRA applies only to the high-noise stage. + +### Image and Frame Encoding Rules + +Any image field accepts: + +- a raw base64 string, or +- a data URL such as `data:image/png;base64,...` + +Channel expectations: + +- `init_image`: 3 channels +- `end_image`: 3 channels +- `control_frames[]`: 3 channels + +Frame ordering rules: + +- `control_frames[]` order is the conditioning frame order +- `control_frames[]` is preserved in request order + +If omitted or null: + +- single-image fields map to an empty `sd_image_t` +- array fields map to an empty C-style array, represented as `pointer = nullptr` and `count = 0` + +### Field Mapping Summary + +Top-level scalar fields: + +| Field | Type | +| --- | --- | +| `prompt` | `string` | +| `negative_prompt` | `string` | +| `clip_skip` | `integer` | +| `width` | `integer` | +| `height` | `integer` | +| `strength` | `number` | +| `seed` | `integer` | +| `video_frames` | `integer` | +| `fps` | `integer` | +| `moe_boundary` | `number` | +| `vace_strength` | `number` | + +Image and frame fields: + +| Field | Type | +| --- | --- | +| `init_image` | `string \| null` | +| `end_image` | `string \| null` | +| `control_frames` | `array` | + +LoRA fields: + +| Field | Type | +| --- | --- | +| `lora[].path` | `string` | +| `lora[].multiplier` | `number` | +| `lora[].is_high_noise` | `boolean` | + +Sampling fields: + +| Field | Type | +| --- | --- | +| `sample_params.scheduler` | `string` | +| `sample_params.sample_method` | `string` | +| `sample_params.sample_steps` | `integer` | +| `sample_params.eta` | `number` | +| `sample_params.shifted_timestep` | `integer` | +| `sample_params.custom_sigmas` | `array` | +| `sample_params.flow_shift` | `number` | +| `sample_params.guidance.txt_cfg` | `number` | +| `sample_params.guidance.img_cfg` | `number` | +| `sample_params.guidance.distilled_guidance` | `number` | +| `sample_params.guidance.slg.layers` | `array` | +| `sample_params.guidance.slg.layer_start` | `number` | +| `sample_params.guidance.slg.layer_end` | `number` | +| `sample_params.guidance.slg.scale` | `number` | + +High-noise sampling fields: + +| Field | Type | +| --- | --- | +| `high_noise_sample_params.scheduler` | `string` | +| `high_noise_sample_params.sample_method` | `string` | +| `high_noise_sample_params.sample_steps` | `integer` | +| `high_noise_sample_params.eta` | `number` | +| `high_noise_sample_params.shifted_timestep` | `integer` | +| `high_noise_sample_params.flow_shift` | `number` | +| `high_noise_sample_params.guidance.txt_cfg` | `number` | +| `high_noise_sample_params.guidance.img_cfg` | `number` | +| `high_noise_sample_params.guidance.distilled_guidance` | `number` | +| `high_noise_sample_params.guidance.slg.layers` | `array` | +| `high_noise_sample_params.guidance.slg.layer_start` | `number` | +| `high_noise_sample_params.guidance.slg.layer_end` | `number` | +| `high_noise_sample_params.guidance.slg.scale` | `number` | + +Other native fields: + +| Field | Type | +| --- | --- | +| `vae_tiling_params` | `object` | +| `cache_mode` | `string` | +| `cache_option` | `string` | +| `scm_mask` | `string` | +| `scm_policy_dynamic` | `boolean` | + +HTTP-only output fields: + +| Field | Type | +| --- | --- | +| `output_format` | `string` | +| `output_compression` | `integer` | + +For `vid_gen`, `output_format` and `output_compression` control container encoding. +`fps` is request metadata for the generated sequence and is echoed in the completed job result. + +Allowed `output_format` values: + +- `webm` +- `webp` +- `avi` + +Output format behavior: + +- `output_format` defaults to `webm` +- `webp` means animated WebP +- `avi` means MJPG AVI +- `webm` requires the server to be built with WebM support; otherwise the request returns `400` + +### Result Payload + +Completed jobs return one encoded container payload, not a list of per-frame images. + +Result fields: + +- `result.b64_json` contains the whole encoded container file as base64 +- `result.mime_type` identifies the media type +- `result.output_format` echoes the selected container format +- `result.fps` echoes the effective playback FPS +- `result.frame_count` reports the actual decoded frame count used to build the container + +Expected MIME types: + +| `output_format` | `mime_type` | +| --- | --- | +| `webm` | `video/webm` | +| `webp` | `image/webp` | +| `avi` | `video/x-msvideo` | + +### Optional Field Handling + +Optional sampling fields may be omitted. + +When omitted, backend defaults apply to these fields: + +- `sample_params.scheduler` +- `sample_params.sample_method` +- `sample_params.eta` +- `sample_params.flow_shift` +- `sample_params.guidance.img_cfg` +- `high_noise_sample_params.scheduler` +- `high_noise_sample_params.sample_method` +- `high_noise_sample_params.eta` +- `high_noise_sample_params.flow_shift` +- `high_noise_sample_params.guidance.img_cfg` + +`high_noise_sample_params` may also be omitted entirely. + +### Frame Count Semantics + +`video_frames` is the requested target length, but the current core video path internally normalizes the effective frame count to the largest `4n + 1` value that does not exceed the requested count. + +Examples: + +- `video_frames = 33` stays `33` +- `video_frames = 34` becomes `33` +- `video_frames = 32` becomes `29` + +The completed job payload includes the actual decoded `frame_count`. + +### Completion Result + +Example completed job: + +```json +{ + "id": "job_01HTXYZVID", + "kind": "vid_gen", + "status": "completed", + "created": 1775401200, + "started": 1775401203, + "completed": 1775401215, + "queue_position": 0, + "result": { + "output_format": "webm", + "mime_type": "video/webm", + "fps": 16, + "frame_count": 33, + "b64_json": "GkXfo59ChoEBQveBAULygQRC84EIQo..." + }, + "error": null +} +``` + +The response returns the encoded `.webm`, animated `.webp`, or `.avi` container payload directly. + +### Failure Result + +Example failed job: + +```json +{ + "id": "job_01HTXYZVID", + "kind": "vid_gen", + "status": "failed", + "created": 1775401200, + "started": 1775401203, + "completed": 1775401204, + "queue_position": 0, + "result": null, + "error": { + "code": "generation_failed", + "message": "generate_video returned no results" + } +} +``` + +### Cancelled Result + +Example cancelled job: + +```json +{ + "id": "job_01HTXYZVID", + "kind": "vid_gen", + "status": "cancelled", + "created": 1775401200, + "started": null, + "completed": 1775401202, + "queue_position": 0, + "result": null, + "error": { + "code": "cancelled", + "message": "job cancelled by client" + } +} +``` + +### Submission Errors + +`POST /sdcpp/v1/vid_gen` may return: -Its request body should mirror `sd_vid_gen_params_t` in the same way that `img_gen` mirrors `sd_img_gen_params_t`. +- `202 Accepted` when the job is created +- `400 Bad Request` for an empty body, unsupported model mode, invalid JSON, invalid generation parameters, or an unsupported output format +- `429 Too Many Requests` when the job queue is full +- `500 Internal Server Error` for unexpected server exceptions during submission diff --git a/examples/server/async_jobs.cpp b/examples/server/async_jobs.cpp index 39c47cfaa..e8e9d8ada 100644 --- a/examples/server/async_jobs.cpp +++ b/examples/server/async_jobs.cpp @@ -95,8 +95,12 @@ bool cancel_queued_job(AsyncJobManager& manager, AsyncGenerationJob& job) { job.status = AsyncJobStatus::Cancelled; job.completed_at = unix_timestamp_now(); job.result_images_b64.clear(); - job.error_code = "cancelled"; - job.error_message = "job cancelled by client"; + job.result_media_b64.clear(); + job.result_media_mime_type.clear(); + job.result_frame_count = 0; + job.result_fps = 0; + job.error_code = "cancelled"; + job.error_message = "job cancelled by client"; return true; } @@ -122,14 +126,24 @@ json make_async_job_json(const AsyncJobManager& manager, const AsyncGenerationJo } if (job.status == AsyncJobStatus::Completed) { - json images = json::array(); - for (size_t i = 0; i < job.result_images_b64.size(); ++i) { - images.push_back({{"index", i}, {"b64_json", job.result_images_b64[i]}}); + if (job.kind == AsyncJobKind::VidGen) { + result["result"] = { + {"output_format", job.vid_gen.output_format}, + {"mime_type", job.result_media_mime_type}, + {"fps", job.result_fps}, + {"frame_count", job.result_frame_count}, + {"b64_json", job.result_media_b64}, + }; + } else { + json images = json::array(); + for (size_t i = 0; i < job.result_images_b64.size(); ++i) { + images.push_back({{"index", i}, {"b64_json", job.result_images_b64[i]}}); + } + result["result"] = { + {"output_format", job.img_gen.output_format}, + {"images", images}, + }; } - result["result"] = { - {"output_format", job.img_gen.output_format}, - {"images", images}, - }; result["error"] = nullptr; } else if (job.status == AsyncJobStatus::Failed || job.status == AsyncJobStatus::Cancelled) { @@ -156,16 +170,15 @@ bool execute_img_gen_job(ServerRuntime& runtime, sd_img_gen_params_t params = job.img_gen.to_sd_img_gen_params_t(); SDImageVec results; - int num_results = 0; { std::lock_guard lock(*runtime.sd_ctx_mutex); sd_image_t* raw_results = generate_image(runtime.sd_ctx, ¶ms); - num_results = params.batch_count; - results.adopt(raw_results, num_results); + results.adopt(raw_results, params.batch_count); } - if (results.empty() || num_results <= 0) { + const int num_results = results.count(); + if (num_results <= 0) { error_message = "generate_image returned no results"; return false; } @@ -208,6 +221,47 @@ bool execute_img_gen_job(ServerRuntime& runtime, return true; } +bool execute_vid_gen_job(ServerRuntime& runtime, + AsyncGenerationJob& job, + std::string& output_media_b64, + std::string& output_media_mime_type, + int& output_frame_count, + int& output_fps, + std::string& error_message) { + sd_vid_gen_params_t params = job.vid_gen.to_sd_vid_gen_params_t(); + + SDImageVec results; + int num_results = 0; + + { + std::lock_guard lock(*runtime.sd_ctx_mutex); + sd_image_t* raw_results = generate_video(runtime.sd_ctx, ¶ms, &num_results); + results.adopt(raw_results, num_results); + } + + num_results = results.count(); + if (num_results <= 0) { + error_message = "generate_video returned no results"; + return false; + } + + std::vector video_bytes = create_video_from_sd_images_to_vector(job.vid_gen.output_format, + results.data(), + num_results, + job.vid_gen.gen_params.fps, + job.vid_gen.output_compression); + if (video_bytes.empty()) { + error_message = "failed to encode generated video container"; + return false; + } + + output_media_b64 = base64_encode(video_bytes); + output_media_mime_type = video_mime_type(job.vid_gen.output_format); + output_frame_count = num_results; + output_fps = job.vid_gen.gen_params.fps; + return true; +} + void async_job_worker(ServerRuntime& runtime) { AsyncJobManager& manager = *runtime.async_job_manager; @@ -240,11 +294,23 @@ void async_job_worker(ServerRuntime& runtime) { } std::vector output_images; + std::string output_media_b64; + std::string output_media_mime_type; + int output_frame_count = 0; + int output_fps = 0; std::string error_message; bool ok = false; if (job->kind == AsyncJobKind::ImgGen) { ok = execute_img_gen_job(runtime, *job, output_images, error_message); + } else if (job->kind == AsyncJobKind::VidGen) { + ok = execute_vid_gen_job(runtime, + *job, + output_media_b64, + output_media_mime_type, + output_frame_count, + output_fps, + error_message); } else { error_message = "unsupported job kind"; } @@ -258,8 +324,12 @@ void async_job_worker(ServerRuntime& runtime) { job->completed_at = unix_timestamp_now(); if (ok) { - job->status = AsyncJobStatus::Completed; - job->result_images_b64 = std::move(output_images); + job->status = AsyncJobStatus::Completed; + job->result_images_b64 = std::move(output_images); + job->result_media_b64 = std::move(output_media_b64); + job->result_media_mime_type = std::move(output_media_mime_type); + job->result_frame_count = output_frame_count; + job->result_fps = output_fps; job->error_code.clear(); job->error_message.clear(); } else { @@ -267,6 +337,10 @@ void async_job_worker(ServerRuntime& runtime) { job->error_code = "generation_failed"; job->error_message = error_message.empty() ? "unknown generation error" : error_message; job->result_images_b64.clear(); + job->result_media_b64.clear(); + job->result_media_mime_type.clear(); + job->result_frame_count = 0; + job->result_fps = 0; } purge_expired_jobs(manager); diff --git a/examples/server/async_jobs.h b/examples/server/async_jobs.h index cb90bdd8e..89997a3b4 100644 --- a/examples/server/async_jobs.h +++ b/examples/server/async_jobs.h @@ -36,7 +36,12 @@ struct AsyncGenerationJob { int64_t started_at = 0; int64_t completed_at = 0; ImgGenJobRequest img_gen; + VidGenJobRequest vid_gen; std::vector result_images_b64; + std::string result_media_b64; + std::string result_media_mime_type; + int result_frame_count = 0; + int result_fps = 0; std::string error_code; std::string error_message; }; @@ -63,4 +68,11 @@ bool execute_img_gen_job(ServerRuntime& runtime, AsyncGenerationJob& job, std::vector& output_images, std::string& error_message); +bool execute_vid_gen_job(ServerRuntime& runtime, + AsyncGenerationJob& job, + std::string& output_media_b64, + std::string& output_media_mime_type, + int& output_frame_count, + int& output_fps, + std::string& error_message); void async_job_worker(ServerRuntime& runtime); diff --git a/examples/server/frontend b/examples/server/frontend index 740475a7a..797ccf808 160000 --- a/examples/server/frontend +++ b/examples/server/frontend @@ -1 +1 @@ -Subproject commit 740475a7a6794dc07fb23e8ec5dc56e7e80aa8c1 +Subproject commit 797ccf80825cc035508ba9b599b2a21953e7f835 diff --git a/examples/server/routes_openai.cpp b/examples/server/routes_openai.cpp index af1210459..ce6215d1e 100644 --- a/examples/server/routes_openai.cpp +++ b/examples/server/routes_openai.cpp @@ -253,6 +253,12 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { svr.Post("/v1/images/generations", [runtime](const httplib::Request& req, httplib::Response& res) { try { + if (!runtime_supports_generation_mode(*runtime, IMG_GEN)) { + res.status = 400; + res.set_content(json({{"error", unsupported_generation_mode_error(IMG_GEN)}}).dump(), "application/json"); + return; + } + ImgGenJobRequest request; std::string error_message; if (!build_openai_generation_request(req, *runtime, request, error_message)) { @@ -319,6 +325,12 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { svr.Post("/v1/images/edits", [runtime](const httplib::Request& req, httplib::Response& res) { try { + if (!runtime_supports_generation_mode(*runtime, IMG_GEN)) { + res.status = 400; + res.set_content(json({{"error", unsupported_generation_mode_error(IMG_GEN)}}).dump(), "application/json"); + return; + } + ImgGenJobRequest request; std::string error_message; if (!build_openai_edit_request(req, *runtime, request, error_message)) { diff --git a/examples/server/routes_sdapi.cpp b/examples/server/routes_sdapi.cpp index ca6661c0b..63c89ec8b 100644 --- a/examples/server/routes_sdapi.cpp +++ b/examples/server/routes_sdapi.cpp @@ -246,6 +246,11 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) { res.set_content(R"({"error":"empty body"})", "application/json"); return; } + if (!runtime_supports_generation_mode(*runtime, IMG_GEN)) { + res.status = 400; + res.set_content(json({{"error", unsupported_generation_mode_error(IMG_GEN)}}).dump(), "application/json"); + return; + } json j = json::parse(req.body); ImgGenJobRequest request; diff --git a/examples/server/routes_sdcpp.cpp b/examples/server/routes_sdcpp.cpp index 930033bbd..8119136a4 100644 --- a/examples/server/routes_sdcpp.cpp +++ b/examples/server/routes_sdcpp.cpp @@ -75,18 +75,122 @@ static fs::path resolve_display_model_path(const ServerRuntime& runtime) { return {}; } +static json make_sample_params_json(const sd_sample_params_t& sample_params, const std::vector& skip_layers) { + const auto& guidance = sample_params.guidance; + return { + {"scheduler", capability_scheduler_name(sample_params.scheduler)}, + {"sample_method", capability_sample_method_name(sample_params.sample_method)}, + {"sample_steps", sample_params.sample_steps}, + {"eta", finite_number_or_null(sample_params.eta)}, + {"shifted_timestep", sample_params.shifted_timestep}, + {"flow_shift", finite_number_or_null(sample_params.flow_shift)}, + {"guidance", + { + {"txt_cfg", guidance.txt_cfg}, + {"img_cfg", finite_number_or_null(guidance.img_cfg)}, + {"distilled_guidance", guidance.distilled_guidance}, + {"slg", + { + {"layers", skip_layers}, + {"layer_start", guidance.slg.layer_start}, + {"layer_end", guidance.slg.layer_end}, + {"scale", guidance.slg.scale}, + }}, + }}, + }; +} + +static json make_img_gen_defaults_json(const SDGenerationParams& defaults, const std::string& output_format) { + return { + {"prompt", defaults.prompt}, + {"negative_prompt", defaults.negative_prompt}, + {"clip_skip", defaults.clip_skip}, + {"width", defaults.width > 0 ? defaults.width : 512}, + {"height", defaults.height > 0 ? defaults.height : 512}, + {"strength", defaults.strength}, + {"seed", defaults.seed}, + {"batch_count", defaults.batch_count}, + {"auto_resize_ref_image", defaults.auto_resize_ref_image}, + {"increase_ref_index", defaults.increase_ref_index}, + {"control_strength", defaults.control_strength}, + {"sample_params", make_sample_params_json(defaults.sample_params, defaults.skip_layers)}, + {"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)}, + {"cache_mode", defaults.cache_mode}, + {"cache_option", defaults.cache_option}, + {"scm_mask", defaults.scm_mask}, + {"scm_policy_dynamic", defaults.scm_policy_dynamic}, + {"output_format", output_format}, + {"output_compression", 100}, + }; +} + +static json make_vid_gen_defaults_json(const SDGenerationParams& defaults, const std::string& output_format) { + return { + {"prompt", defaults.prompt}, + {"negative_prompt", defaults.negative_prompt}, + {"clip_skip", defaults.clip_skip}, + {"width", defaults.width > 0 ? defaults.width : 512}, + {"height", defaults.height > 0 ? defaults.height : 512}, + {"strength", defaults.strength}, + {"seed", defaults.seed}, + {"video_frames", defaults.video_frames}, + {"fps", defaults.fps}, + {"moe_boundary", defaults.moe_boundary}, + {"vace_strength", defaults.vace_strength}, + {"sample_params", make_sample_params_json(defaults.sample_params, defaults.skip_layers)}, + {"high_noise_sample_params", make_sample_params_json(defaults.high_noise_sample_params, defaults.high_noise_skip_layers)}, + {"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)}, + {"cache_mode", defaults.cache_mode}, + {"cache_option", defaults.cache_option}, + {"scm_mask", defaults.scm_mask}, + {"scm_policy_dynamic", defaults.scm_policy_dynamic}, + {"output_format", output_format}, + {"output_compression", 100}, + }; +} + +static json make_img_gen_features_json() { + return { + {"init_image", true}, + {"mask_image", true}, + {"control_image", true}, + {"ref_images", true}, + {"lora", true}, + {"vae_tiling", true}, + {"cache", true}, + {"cancel_queued", true}, + {"cancel_generating", false}, + }; +} + +static json make_vid_gen_features_json() { + return { + {"init_image", true}, + {"end_image", true}, + {"control_frames", true}, + {"high_noise_sample_params", true}, + {"lora", true}, + {"vae_tiling", true}, + {"cache", true}, + {"cancel_queued", true}, + {"cancel_generating", false}, + }; +} + static json make_capabilities_json(ServerRuntime& runtime) { refresh_lora_cache(runtime); AsyncJobManager& manager = *runtime.async_job_manager; const auto& defaults = *runtime.default_gen_params; - const auto& sample_params = defaults.sample_params; - const auto& guidance = sample_params.guidance; const fs::path model_path = resolve_display_model_path(runtime); + const bool supports_img = runtime_supports_generation_mode(runtime, IMG_GEN); + const bool supports_vid = runtime_supports_generation_mode(runtime, VID_GEN); json samplers = json::array(); json schedulers = json::array(); - json output_formats = json::array({"png", "jpeg"}); + json image_output_formats = supported_img_output_formats(); + json video_output_formats = supported_vid_output_formats(); json available_loras = json::array(); + json supported_modes = json::array(); for (int i = 0; i < SAMPLE_METHOD_COUNT; ++i) { samplers.push_back(sd_sample_method_name((sample_method_t)i)); @@ -96,10 +200,6 @@ static json make_capabilities_json(ServerRuntime& runtime) { schedulers.push_back(sd_scheduler_name((scheduler_t)i)); } -#ifdef SD_USE_WEBP - output_formats.push_back("webp"); -#endif - { std::lock_guard lock(*runtime.lora_mutex); for (const auto& entry : *runtime.lora_cache) { @@ -110,77 +210,80 @@ static json make_capabilities_json(ServerRuntime& runtime) { } } + if (supports_img) { + supported_modes.push_back("img_gen"); + } + if (supports_vid) { + supported_modes.push_back("vid_gen"); + } + + std::string default_img_output_format = "png"; + std::string default_vid_output_format = "avi"; + if (!image_output_formats.empty()) { + default_img_output_format = image_output_formats[0].get(); + } + if (!video_output_formats.empty()) { + default_vid_output_format = video_output_formats[0].get(); + } + + json defaults_by_mode = json::object(); + json output_formats_by_mode = json::object(); + json features_by_mode = json::object(); + if (supports_img) { + defaults_by_mode["img_gen"] = make_img_gen_defaults_json(defaults, default_img_output_format); + output_formats_by_mode["img_gen"] = image_output_formats; + features_by_mode["img_gen"] = make_img_gen_features_json(); + } + if (supports_vid) { + defaults_by_mode["vid_gen"] = make_vid_gen_defaults_json(defaults, default_vid_output_format); + output_formats_by_mode["vid_gen"] = video_output_formats; + features_by_mode["vid_gen"] = make_vid_gen_features_json(); + } + + json top_level_defaults = json::object(); + json top_level_output_formats = json::array(); + json top_level_features = { + {"cancel_queued", true}, + {"cancel_generating", false}, + }; + std::string current_mode = ""; + if (supports_img) { + current_mode = "img_gen"; + top_level_defaults = defaults_by_mode["img_gen"]; + top_level_output_formats = output_formats_by_mode["img_gen"]; + top_level_features = features_by_mode["img_gen"]; + } else if (supports_vid) { + current_mode = "vid_gen"; + top_level_defaults = defaults_by_mode["vid_gen"]; + top_level_output_formats = output_formats_by_mode["vid_gen"]; + top_level_features = features_by_mode["vid_gen"]; + } + json result; result["model"] = { {"name", model_path.filename().u8string()}, {"stem", model_path.stem().u8string()}, {"path", model_path.u8string()}, }; - result["defaults"] = { - {"prompt", defaults.prompt}, - {"negative_prompt", defaults.negative_prompt}, - {"clip_skip", defaults.clip_skip}, - {"width", defaults.width > 0 ? defaults.width : 512}, - {"height", defaults.height > 0 ? defaults.height : 512}, - {"strength", defaults.strength}, - {"seed", defaults.seed}, - {"batch_count", defaults.batch_count}, - {"auto_resize_ref_image", defaults.auto_resize_ref_image}, - {"increase_ref_index", defaults.increase_ref_index}, - {"control_strength", defaults.control_strength}, - {"sample_params", - { - {"scheduler", capability_scheduler_name(sample_params.scheduler)}, - {"sample_method", capability_sample_method_name(sample_params.sample_method)}, - {"sample_steps", sample_params.sample_steps}, - {"eta", finite_number_or_null(sample_params.eta)}, - {"shifted_timestep", sample_params.shifted_timestep}, - {"flow_shift", finite_number_or_null(sample_params.flow_shift)}, - {"guidance", - { - {"txt_cfg", guidance.txt_cfg}, - {"img_cfg", finite_number_or_null(guidance.img_cfg)}, - {"distilled_guidance", guidance.distilled_guidance}, - {"slg", - { - {"layers", defaults.skip_layers}, - {"layer_start", guidance.slg.layer_start}, - {"layer_end", guidance.slg.layer_end}, - {"scale", guidance.slg.scale}, - }}, - }}, - }}, - {"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)}, - {"cache_mode", defaults.cache_mode}, - {"cache_option", defaults.cache_option}, - {"scm_mask", defaults.scm_mask}, - {"scm_policy_dynamic", defaults.scm_policy_dynamic}, - {"output_format", "png"}, - {"output_compression", 100}, - }; - result["limits"] = { - {"min_width", 64}, - {"max_width", 4096}, - {"min_height", 64}, - {"max_height", 4096}, - {"max_batch_count", 8}, - {"max_queue_size", manager.max_pending_jobs}, + result["current_mode"] = current_mode; + result["supported_modes"] = supported_modes; + result["defaults"] = top_level_defaults; + result["defaults_by_mode"] = defaults_by_mode; + result["limits"] = { + {"min_width", 64}, + {"max_width", 4096}, + {"min_height", 64}, + {"max_height", 4096}, + {"max_batch_count", 8}, + {"max_queue_size", manager.max_pending_jobs}, }; - result["samplers"] = samplers; - result["schedulers"] = schedulers; - result["output_formats"] = output_formats; - result["features"] = { - {"init_image", true}, - {"mask_image", true}, - {"control_image", true}, - {"ref_images", true}, - {"lora", true}, - {"vae_tiling", true}, - {"cache", true}, - {"cancel_queued", true}, - {"cancel_generating", false}, - }; - result["loras"] = available_loras; + result["samplers"] = samplers; + result["schedulers"] = schedulers; + result["output_formats"] = top_level_output_formats; + result["output_formats_by_mode"] = output_formats_by_mode; + result["features"] = top_level_features; + result["features_by_mode"] = features_by_mode; + result["loras"] = available_loras; return result; } @@ -211,6 +314,33 @@ static bool parse_img_gen_request(const json& body, return true; } +static bool parse_vid_gen_request(const json& body, + ServerRuntime& runtime, + VidGenJobRequest& request, + std::string& error_message) { + request.gen_params = *runtime.default_gen_params; + + refresh_lora_cache(runtime); + if (!request.gen_params.from_json_str(body.dump(), [&](const std::string& path) { + return get_lora_full_path(runtime, path); + })) { + error_message = "invalid generation parameters"; + return false; + } + + std::string output_format = body.value("output_format", "webm"); + int output_compression = body.value("output_compression", 100); + if (!assign_output_options(request, output_format, output_compression, error_message)) { + return false; + } + // Intentionally disable prompt-embedded LoRA tag parsing for server APIs. + if (!request.gen_params.resolve_and_validate(VID_GEN, "", true)) { + error_message = "invalid generation parameters"; + return false; + } + return true; +} + void register_sdcpp_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { ServerRuntime* runtime = &rt; @@ -226,6 +356,11 @@ void register_sdcpp_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { res.set_content(R"({"error":"empty body"})", "application/json"); return; } + if (!runtime_supports_generation_mode(*runtime, IMG_GEN)) { + res.status = 400; + res.set_content(json({{"error", unsupported_generation_mode_error(IMG_GEN)}}).dump(), "application/json"); + return; + } json body = json::parse(req.body); ImgGenJobRequest request; @@ -276,9 +411,66 @@ void register_sdcpp_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { } }); - svr.Post("/sdcpp/v1/vid_gen", [](const httplib::Request&, httplib::Response& res) { - res.status = 501; - res.set_content(R"({"error":"vid_gen is reserved and not implemented yet"})", "application/json"); + svr.Post("/sdcpp/v1/vid_gen", [runtime](const httplib::Request& req, httplib::Response& res) { + try { + if (req.body.empty()) { + res.status = 400; + res.set_content(R"({"error":"empty body"})", "application/json"); + return; + } + if (!runtime_supports_generation_mode(*runtime, VID_GEN)) { + res.status = 400; + res.set_content(json({{"error", unsupported_generation_mode_error(VID_GEN)}}).dump(), "application/json"); + return; + } + + json body = json::parse(req.body); + VidGenJobRequest request; + std::string error_message; + if (!parse_vid_gen_request(body, *runtime, request, error_message)) { + res.status = 400; + res.set_content(json({{"error", error_message}}).dump(), "application/json"); + return; + } + + AsyncJobManager& manager = *runtime->async_job_manager; + std::shared_ptr job = std::make_shared(); + job->kind = AsyncJobKind::VidGen; + job->status = AsyncJobStatus::Queued; + job->created_at = unix_timestamp_now(); + job->vid_gen = std::move(request); + + { + std::lock_guard lock(manager.mutex); + purge_expired_jobs(manager); + if (count_pending_jobs(manager) >= manager.max_pending_jobs) { + res.status = 429; + res.set_content(R"({"error":"job queue is full"})", "application/json"); + return; + } + job->id = make_async_job_id(manager); + manager.jobs[job->id] = job; + manager.queue.push_back(job->id); + } + + manager.cv.notify_one(); + + json out; + out["id"] = job->id; + out["kind"] = async_job_kind_name(job->kind); + out["status"] = async_job_status_name(job->status); + out["created"] = job->created_at; + out["poll_url"] = "/sdcpp/v1/jobs/" + job->id; + + res.status = 202; + res.set_content(out.dump(), "application/json"); + } catch (const json::parse_error& e) { + res.status = 400; + res.set_content(json({{"error", "invalid json"}, {"message", e.what()}}).dump(), "application/json"); + } catch (const std::exception& e) { + res.status = 500; + res.set_content(json({{"error", "server_error"}, {"message", e.what()}}).dump(), "application/json"); + } }); svr.Get(R"(/sdcpp/v1/jobs/([A-Za-z0-9_\-]+))", [runtime](const httplib::Request& req, httplib::Response& res) { diff --git a/examples/server/runtime.cpp b/examples/server/runtime.cpp index c29799e3a..39880a182 100644 --- a/examples/server/runtime.cpp +++ b/examples/server/runtime.cpp @@ -45,6 +45,44 @@ std::string normalize_output_format(std::string output_format) { return output_format; } +std::vector supported_img_output_formats(bool allow_webp) { + std::vector formats = {"png", "jpeg"}; +#ifdef SD_USE_WEBP + if (allow_webp) { + formats.push_back("webp"); + } +#else + (void)allow_webp; +#endif + return formats; +} + +std::vector supported_vid_output_formats() { + std::vector formats; +#ifdef SD_USE_WEBM + formats.push_back("webm"); +#endif +#ifdef SD_USE_WEBP + formats.push_back("webp"); +#endif + formats.push_back("avi"); + return formats; +} + +static std::string valid_vid_output_formats_message() { + const std::vector formats = supported_vid_output_formats(); + + std::string message = "invalid output_format, must be one of ["; + for (size_t i = 0; i < formats.size(); ++i) { + if (i > 0) { + message += ", "; + } + message += formats[i]; + } + message += "]"; + return message; +} + bool assign_output_options(ImgGenJobRequest& request, std::string output_format, int output_compression, @@ -53,19 +91,88 @@ bool assign_output_options(ImgGenJobRequest& request, request.output_format = normalize_output_format(std::move(output_format)); request.output_compression = std::clamp(output_compression, 0, 100); - const bool valid_format = request.output_format == "png" || - request.output_format == "jpeg" || - (allow_webp && request.output_format == "webp"); + const std::vector valid_formats = supported_img_output_formats(allow_webp); + const bool valid_format = std::find(valid_formats.begin(), + valid_formats.end(), + request.output_format) != valid_formats.end(); if (!valid_format) { - error_message = allow_webp - ? "invalid output_format, must be one of [png, jpeg, webp]" - : "invalid output_format, must be one of [png, jpeg]"; + error_message = "invalid output_format, must be one of ["; + for (size_t i = 0; i < valid_formats.size(); ++i) { + if (i > 0) { + error_message += ", "; + } + error_message += valid_formats[i]; + } + error_message += "]"; return false; } return true; } +bool assign_output_options(VidGenJobRequest& request, + std::string output_format, + int output_compression, + std::string& error_message) { + request.output_format = normalize_output_format(std::move(output_format)); + request.output_compression = std::clamp(output_compression, 0, 100); + + if (request.output_format == "avi") { + return true; + } + + if (request.output_format == "webm") { +#ifdef SD_USE_WEBM + return true; +#else + error_message = valid_vid_output_formats_message(); + return false; +#endif + } + + if (request.output_format == "webp") { +#ifdef SD_USE_WEBP + return true; +#else + error_message = valid_vid_output_formats_message(); + return false; +#endif + } + + error_message = valid_vid_output_formats_message(); + return false; +} + +std::string video_mime_type(const std::string& output_format) { + if (output_format == "webm") { + return "video/webm"; + } + if (output_format == "webp") { + return "image/webp"; + } + return "video/x-msvideo"; +} + +bool runtime_supports_generation_mode(const ServerRuntime& runtime, SDMode mode) { + if (mode == VID_GEN) { + return sd_ctx_supports_video_generation(runtime.sd_ctx); + } + if (mode == IMG_GEN) { + return sd_ctx_supports_image_generation(runtime.sd_ctx); + } + return true; +} + +std::string unsupported_generation_mode_error(SDMode mode) { + if (mode == VID_GEN) { + return "loaded model does not support vid_gen"; + } + if (mode == IMG_GEN) { + return "loaded model does not support img_gen"; + } + return "loaded model does not support requested mode"; +} + ArgOptions SDSvrParams::get_options() { ArgOptions options; diff --git a/examples/server/runtime.h b/examples/server/runtime.h index 65e932439..1970e7dbc 100644 --- a/examples/server/runtime.h +++ b/examples/server/runtime.h @@ -58,13 +58,32 @@ struct ImgGenJobRequest { } }; +struct VidGenJobRequest { + SDGenerationParams gen_params; + std::string output_format = "webm"; + int output_compression = 100; + + sd_vid_gen_params_t to_sd_vid_gen_params_t() { + return gen_params.to_sd_vid_gen_params_t(); + } +}; + std::string base64_encode(const std::vector& bytes); std::string normalize_output_format(std::string output_format); +std::vector supported_img_output_formats(bool allow_webp = true); +std::vector supported_vid_output_formats(); bool assign_output_options(ImgGenJobRequest& request, std::string output_format, int output_compression, bool allow_webp, std::string& error_message); +bool assign_output_options(VidGenJobRequest& request, + std::string output_format, + int output_compression, + std::string& error_message); +std::string video_mime_type(const std::string& output_format); +bool runtime_supports_generation_mode(const ServerRuntime& runtime, SDMode mode); +std::string unsupported_generation_mode_error(SDMode mode); void refresh_lora_cache(ServerRuntime& rt); std::string get_lora_full_path(ServerRuntime& rt, const std::string& path); int64_t unix_timestamp_now(); diff --git a/include/stable-diffusion.h b/include/stable-diffusion.h index 64ca401a4..a99b10450 100644 --- a/include/stable-diffusion.h +++ b/include/stable-diffusion.h @@ -348,6 +348,8 @@ SD_API void sd_set_progress_callback(sd_progress_cb_t cb, void* data); SD_API void sd_set_preview_callback(sd_preview_cb_t cb, enum preview_t mode, int interval, bool denoised, bool noisy, void* data); SD_API int32_t sd_get_num_physical_cores(); SD_API const char* sd_get_system_info(); +SD_API bool sd_ctx_supports_image_generation(const sd_ctx_t* sd_ctx); +SD_API bool sd_ctx_supports_video_generation(const sd_ctx_t* sd_ctx); SD_API const char* sd_type_name(enum sd_type_t type); SD_API enum sd_type_t str_to_sd_type(const char* str); diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp index 42ed2d46e..62558fdd0 100644 --- a/src/stable-diffusion.cpp +++ b/src/stable-diffusion.cpp @@ -2388,6 +2388,14 @@ struct sd_ctx_t { StableDiffusionGGML* sd = nullptr; }; +static bool sd_version_supports_video_generation(SDVersion version) { + return version == VERSION_SVD || sd_version_is_wan(version); +} + +static bool sd_version_supports_image_generation(SDVersion version) { + return !sd_version_supports_video_generation(version); +} + sd_ctx_t* new_sd_ctx(const sd_ctx_params_t* sd_ctx_params) { sd_ctx_t* sd_ctx = (sd_ctx_t*)malloc(sizeof(sd_ctx_t)); if (sd_ctx == nullptr) { @@ -2417,6 +2425,20 @@ void free_sd_ctx(sd_ctx_t* sd_ctx) { free(sd_ctx); } +SD_API bool sd_ctx_supports_image_generation(const sd_ctx_t* sd_ctx) { + if (sd_ctx == nullptr || sd_ctx->sd == nullptr) { + return false; + } + return sd_version_supports_image_generation(sd_ctx->sd->version); +} + +SD_API bool sd_ctx_supports_video_generation(const sd_ctx_t* sd_ctx) { + if (sd_ctx == nullptr || sd_ctx->sd == nullptr) { + return false; + } + return sd_version_supports_video_generation(sd_ctx->sd->version); +} + enum sample_method_t sd_get_default_sample_method(const sd_ctx_t* sd_ctx) { if (sd_ctx != nullptr && sd_ctx->sd != nullptr) { if (sd_version_is_dit(sd_ctx->sd->version)) {