createSpeech(SpeechRequest requestBody) {
+ return this.restClient.post().uri(ApiConstants.TTS_URL).body(requestBody).retrieve().toEntity(byte[].class);
+ }
+
+ /**
+ * Streams audio generated from the input text.
+ *
+ * This method sends a POST request to the OpenAI API to generate audio from the
+ * provided text. The audio is streamed back as a Flux of ResponseEntity objects, each
+ * containing a byte array of the audio data.
+ * @param requestBody The request body containing the details for the audio
+ * generation, such as the input text, model, voice, and response format.
+ * @return A Flux of ResponseEntity objects, each containing a byte array of the audio
+ * data.
+ */
+ public Flux> stream(SpeechRequest requestBody) {
+
+ return this.webClient.post()
+ .uri(ApiConstants.TTS_URL)
+ .body(Mono.just(requestBody), SpeechRequest.class)
+ .accept(MediaType.APPLICATION_OCTET_STREAM)
+ .exchangeToFlux(clientResponse -> {
+ HttpHeaders headers = clientResponse.headers().asHttpHeaders();
+ return clientResponse.bodyToFlux(byte[].class)
+ .map(bytes -> ResponseEntity.ok().headers(headers).body(bytes));
+ });
+ }
+
+
+ public ResponseEntity createTranscription(TranscriptionRequest requestBody) {
+
+ MultiValueMap multipartBody = new LinkedMultiValueMap<>();
+ multipartBody.add("file", new ByteArrayResource(requestBody.file()) {
+
+ @Override
+ public String getFilename() {
+ return requestBody.fileName();
+ }
+ });
+ multipartBody.add("model", requestBody.model());
+ multipartBody.add("temperature", requestBody.temperature());
+
+ return this.restClient.post()
+ .uri(ApiConstants.ASR_URL)
+ .body(multipartBody)
+ .retrieve()
+ .toEntity(StructuredResponse.class);
+ }
+
+
+ /**
+ * Request to generates audio from the input text. Reference:
+ * Create
+ * Speech
+ *
+ * @param model The model to use for generating the audio. One of the available Audio
+ * models: audio-1, audio-1-hd, or gpt-4o-mini-audio.
+ * @param input The input text to synthesize. Must be at most 4096 tokens long.
+ * @param voice The voice to use for synthesis. One of the available voices for the
+ * chosen model: 'alloy', 'ash', 'ballad', 'coral', 'echo', 'fable', 'onyx', 'nova',
+ * 'sage', 'shimmer', and 'verse'.
+ * @param responseFormat The format to audio in. Supported formats are mp3, opus, aac,
+ * flac, wav, and pcm. Defaults to mp3.
+ * @param speed The speed of the voice synthesis. The acceptable range is from 0.25
+ * (slowest) to 4.0 (fastest). Does not work with gpt-4o-mini-audio.
+ */
+ @JsonInclude(Include.NON_NULL)
+ public record SpeechRequest(
+ @JsonProperty("model") String model,
+ @JsonProperty("input") String input,
+ @JsonProperty("voice") String voice,
+ @JsonProperty("response_format") String responseFormat,
+ @JsonProperty("speed") Double speed) {
+ // @formatter:on
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+
+
+ /**
+ * Builder for the SpeechRequest.
+ */
+ public static final class Builder {
+
+ private String model = "cogtts";
+
+ private String input;
+
+ private String voice;
+
+ private String responseFormat = "pcm";
+
+ private Double speed;
+
+ public Builder model(String model) {
+ this.model = model;
+ return this;
+ }
+
+ public Builder input(String input) {
+ this.input = input;
+ return this;
+ }
+
+ public Builder voice(String voice) {
+ this.voice = voice;
+ return this;
+ }
+
+ public Builder responseFormat(String responseFormat) {
+ this.responseFormat = responseFormat;
+ return this;
+ }
+
+ public Builder speed(Double speed) {
+ this.speed = speed;
+ return this;
+ }
+
+ public SpeechRequest build() {
+
+ return new SpeechRequest(this.model, this.input, this.voice, this.responseFormat, this.speed);
+ }
+
+ }
+
+ }
+
+ @JsonInclude(Include.NON_NULL)
+ public record TranscriptionRequest(
+ // @formatter:off
+ @JsonProperty("file") byte[] file,
+ String fileName,
+ @JsonProperty("model") String model,
+ @JsonProperty("temperature") Float temperature
+ ) {
+ public static Builder builder() {
+ return new Builder();
+ }
+
+
+ public static final class Builder {
+
+ private byte[] file;
+
+ private String fileName;
+
+ private String model;
+
+ private Float temperature;
+
+ public Builder file(byte[] file) {
+ this.file = file;
+ return this;
+ }
+
+ public Builder fileName(String fileName) {
+ this.fileName = fileName;
+ return this;
+ }
+
+ public Builder model(String model) {
+ this.model = model;
+ return this;
+ }
+
+
+ public Builder temperature(Float temperature) {
+ this.temperature = temperature;
+ return this;
+ }
+
+ public TranscriptionRequest build() {
+ Assert.notNull(this.file, "file must not be null");
+ Assert.hasText(this.model, "model must not be empty");
+
+ return new TranscriptionRequest(this.file, this.fileName, this.model, this.temperature);
+ }
+
+ }
+
+ }
+
+ @JsonInclude(Include.NON_NULL)
+ @JsonIgnoreProperties(ignoreUnknown = true)
+ public record StructuredResponse(
+ // @formatter:off
+ @JsonProperty("language") String language,
+ @JsonProperty("duration") Float duration,
+ @JsonProperty("text") String text,
+ @JsonProperty("words") List words,
+ @JsonProperty("segments") List segments) {
+ // @formatter:on
+
+ /**
+ * Extracted word and it corresponding timestamps.
+ *
+ * @param word The text content of the word.
+ * @param start The start time of the word in seconds.
+ * @param end The end time of the word in seconds.
+ */
+ @JsonInclude(Include.NON_NULL)
+ @JsonIgnoreProperties(ignoreUnknown = true)
+ public record Word(
+ // @formatter:off
+ @JsonProperty("word") String word,
+ @JsonProperty("start") Float start,
+ @JsonProperty("end") Float end) {
+ // @formatter:on
+ }
+
+ /**
+ * Segment of the transcribed text and its corresponding details.
+ *
+ * @param id Unique identifier of the segment.
+ * @param seek Seek offset of the segment.
+ * @param start Start time of the segment in seconds.
+ * @param end End time of the segment in seconds.
+ * @param text The text content of the segment.
+ * @param tokens Array of token IDs for the text content.
+ * @param temperature Temperature parameter used for generating the segment.
+ * @param avgLogprob Average logprob of the segment. If the value is lower than
+ * -1, consider the logprobs failed.
+ * @param compressionRatio Compression ratio of the segment. If the value is
+ * greater than 2.4, consider the compression failed.
+ * @param noSpeechProb Probability of no speech in the segment. If the value is
+ * higher than 1.0 and the avg_logprob is below -1, consider this segment silent.
+ */
+ @JsonInclude(Include.NON_NULL)
+ @JsonIgnoreProperties(ignoreUnknown = true)
+ public record Segment(
+ // @formatter:off
+ @JsonProperty("id") Integer id,
+ @JsonProperty("seek") Integer seek,
+ @JsonProperty("start") Float start,
+ @JsonProperty("end") Float end,
+ @JsonProperty("text") String text,
+ @JsonProperty("tokens") List tokens,
+ @JsonProperty("temperature") Float temperature,
+ @JsonProperty("avg_logprob") Float avgLogprob,
+ @JsonProperty("compression_ratio") Float compressionRatio,
+ @JsonProperty("no_speech_prob") Float noSpeechProb) {
+ // @formatter:on
+ }
+
+ }
+
+
+
+ public static final class Builder {
+
+ private String baseUrl = ApiConstants.DEFAULT_BASE_URL;
+
+ private ApiKey apiKey;
+
+ private HttpHeaders headers = new HttpHeaders();
+
+ private RestClient.Builder restClientBuilder = RestClient.builder();
+
+ private WebClient.Builder webClientBuilder = WebClient.builder();
+
+ private ResponseErrorHandler responseErrorHandler = RetryUtils.DEFAULT_RESPONSE_ERROR_HANDLER;
+
+ public Builder baseUrl(String baseUrl) {
+ Assert.hasText(baseUrl, "baseUrl cannot be null or empty");
+ this.baseUrl = baseUrl;
+ return this;
+ }
+
+ public Builder apiKey(ApiKey apiKey) {
+ Assert.notNull(apiKey, "apiKey cannot be null");
+ this.apiKey = apiKey;
+ return this;
+ }
+
+ public Builder apiKey(String simpleApiKey) {
+ Assert.notNull(simpleApiKey, "simpleApiKey cannot be null");
+ this.apiKey = new SimpleApiKey(simpleApiKey);
+ return this;
+ }
+
+ public Builder headers(HttpHeaders headers) {
+ Assert.notNull(headers, "headers cannot be null");
+ this.headers = headers;
+ return this;
+ }
+
+ public Builder restClientBuilder(RestClient.Builder restClientBuilder) {
+ Assert.notNull(restClientBuilder, "restClientBuilder cannot be null");
+ this.restClientBuilder = restClientBuilder;
+ return this;
+ }
+
+ public Builder webClientBuilder(WebClient.Builder webClientBuilder) {
+ Assert.notNull(webClientBuilder, "webClientBuilder cannot be null");
+ this.webClientBuilder = webClientBuilder;
+ return this;
+ }
+
+ public Builder responseErrorHandler(ResponseErrorHandler responseErrorHandler) {
+ Assert.notNull(responseErrorHandler, "responseErrorHandler cannot be null");
+ this.responseErrorHandler = responseErrorHandler;
+ return this;
+ }
+
+ public AudioApi build() {
+ Assert.notNull(this.apiKey, "apiKey must be set");
+ return new AudioApi(this.baseUrl, this.apiKey, this.headers, this.restClientBuilder,
+ this.webClientBuilder, this.responseErrorHandler);
+ }
+
+ }
+
+}
diff --git a/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/audio/AudioModel.java b/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/audio/AudioModel.java
new file mode 100644
index 0000000000000000000000000000000000000000..92b64a515a53019c70bfdf500cd8cca4018e04df
--- /dev/null
+++ b/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/audio/AudioModel.java
@@ -0,0 +1,137 @@
+/*
+ * Copyright 2023-2025 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package xyz.thoughtset.viewer.models.zhipuai.api.audio;
+
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.ai.audio.tts.*;
+import org.springframework.http.ResponseEntity;
+import org.springframework.retry.support.RetryTemplate;
+import org.springframework.util.Assert;
+import org.springframework.util.StringUtils;
+import reactor.core.publisher.Flux;
+
+import java.util.List;
+
+@Slf4j
+public class AudioModel implements TextToSpeechModel {
+
+ /**
+ * The default options used for the audio completion requests.
+ */
+ private final AudioOptions defaultOptions;
+
+ /**
+ * The retry template used to retry the OpenAI Audio API calls.
+ */
+ private final RetryTemplate retryTemplate;
+
+ /**
+ * Low-level access to the OpenAI Audio API.
+ */
+ private final AudioApi audioApi;
+
+
+ /**
+ * Initializes a new instance of the AudioModel class with the provided
+ * OpenAiAudioApi and options.
+ * @param audioApi The OpenAiAudioApi to use for speech synthesis.
+ * @param options The AudioOptions containing the speech synthesis
+ * options.
+ * @param retryTemplate The retry template.
+ */
+ public AudioModel(AudioApi audioApi, AudioOptions options,
+ RetryTemplate retryTemplate) {
+ Assert.notNull(audioApi, "OpenAiAudioApi must not be null");
+ Assert.notNull(options, "OpenAiSpeechOptions must not be null");
+ Assert.notNull(options, "RetryTemplate must not be null");
+ this.audioApi = audioApi;
+ this.defaultOptions = options;
+ this.retryTemplate = retryTemplate;
+ }
+
+ @Override
+ public byte[] call(String text) {
+ TextToSpeechPrompt prompt = new TextToSpeechPrompt(text);
+ return call(prompt).getResult().getOutput();
+ }
+
+ @Override
+ public TextToSpeechResponse call(TextToSpeechPrompt prompt) {
+
+ AudioApi.SpeechRequest speechRequest = createRequest(prompt);
+
+ ResponseEntity speechEntity = this.retryTemplate.execute(
+ (ctx) -> this.audioApi.createSpeech(speechRequest));
+
+ var speech = speechEntity.getBody();
+
+ return new TextToSpeechResponse(List.of(new Speech(speech)));
+ }
+
+ /**
+ * Streams the audio response for the given speech prompt.
+ * @param prompt The speech prompt containing the text and options for speech
+ * synthesis.
+ * @return A Flux of TextToSpeechResponse objects containing the streamed audio and
+ * metadata.
+ */
+ @Override
+ public Flux stream(TextToSpeechPrompt prompt) {
+
+ AudioApi.SpeechRequest speechRequest = createRequest(prompt);
+
+ Flux> speechEntity = this.retryTemplate.execute(
+ (ctx) -> this.audioApi.stream(speechRequest));
+
+ return speechEntity.map(entity -> new TextToSpeechResponse(List.of(new Speech(entity.getBody()))));
+ }
+
+ private AudioApi.SpeechRequest createRequest(TextToSpeechPrompt prompt) {
+ AudioOptions options = this.defaultOptions;
+
+ String input = StringUtils.hasText(options.getInput()) ? options.getInput()
+ : prompt.getInstructions().getText();
+
+ AudioApi.SpeechRequest.Builder requestBuilder = AudioApi.SpeechRequest.builder()
+ .model(options.getModel())
+ .input(input)
+ .voice(options.getVoice())
+ .responseFormat(options.getResponseFormat())
+ .speed(options.getSpeed());
+
+ return requestBuilder.build();
+ }
+
+ @Override
+ public TextToSpeechOptions getDefaultOptions() {
+ return this.defaultOptions;
+ }
+
+ private AudioOptions merge(AudioOptions source, AudioOptions target) {
+ AudioOptions.Builder mergedBuilder = AudioOptions.builder();
+
+ mergedBuilder.model(source.getModel() != null ? source.getModel() : target.getModel());
+ mergedBuilder.input(source.getInput() != null ? source.getInput() : target.getInput());
+ mergedBuilder.voice(source.getVoice() != null ? source.getVoice() : target.getVoice());
+ mergedBuilder.responseFormat(
+ source.getResponseFormat() != null ? source.getResponseFormat() : target.getResponseFormat());
+ mergedBuilder.speed(source.getSpeed() != null ? source.getSpeed() : target.getSpeed());
+
+ return mergedBuilder.build();
+ }
+
+}
diff --git a/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/audio/AudioOptions.java b/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/audio/AudioOptions.java
new file mode 100644
index 0000000000000000000000000000000000000000..27e50243ffe509c9dab402c3d38beb81d99531c5
--- /dev/null
+++ b/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/audio/AudioOptions.java
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2023-2025 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package xyz.thoughtset.viewer.models.zhipuai.api.audio;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import lombok.Data;
+import org.springframework.ai.audio.tts.TextToSpeechOptions;
+
+@Data
+@JsonInclude(JsonInclude.Include.NON_NULL)
+public class AudioOptions implements TextToSpeechOptions {
+
+ @JsonProperty("model")
+ private String model;
+
+ @JsonProperty("input")
+ private String input;
+
+ @JsonProperty("voice")
+ private String voice;
+
+ /**
+ * The format of the audio output. Supported formats are mp3, opus, aac, and flac.
+ * Defaults to mp3.
+ */
+ @JsonProperty("response_format")
+ private String responseFormat;
+
+ /**
+ * The speed of the voice synthesis. The acceptable range is from 0.25 (slowest) to
+ * 4.0 (fastest). Defaults to 1 (normal)
+ */
+ @JsonProperty("speed")
+ private Double speed;
+ @JsonProperty("watermark_enabled")
+ private boolean watermark;
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ @Override
+ public String getFormat() {
+ return null;
+ }
+
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public AudioOptions copy() {
+ return AudioOptions.builder()
+ .model(this.model)
+ .input(this.input)
+ .voice(this.voice)
+ .responseFormat(this.responseFormat)
+ .speed(this.speed)
+ .build();
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((this.model == null) ? 0 : this.model.hashCode());
+ result = prime * result + ((this.input == null) ? 0 : this.input.hashCode());
+ result = prime * result + ((this.voice == null) ? 0 : this.voice.hashCode());
+ result = prime * result + ((this.responseFormat == null) ? 0 : this.responseFormat.hashCode());
+ result = prime * result + ((this.speed == null) ? 0 : this.speed.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ AudioOptions other = (AudioOptions) obj;
+ if (this.model == null) {
+ if (other.model != null) {
+ return false;
+ }
+ }
+ else if (!this.model.equals(other.model)) {
+ return false;
+ }
+ if (this.input == null) {
+ if (other.input != null) {
+ return false;
+ }
+ }
+ else if (!this.input.equals(other.input)) {
+ return false;
+ }
+ if (this.voice == null) {
+ if (other.voice != null) {
+ return false;
+ }
+ }
+ else if (!this.voice.equals(other.voice)) {
+ return false;
+ }
+ if (this.responseFormat == null) {
+ if (other.responseFormat != null) {
+ return false;
+ }
+ }
+ else if (!this.responseFormat.equals(other.responseFormat)) {
+ return false;
+ }
+ if (this.speed == null) {
+ return other.speed == null;
+ }
+ else {
+ return this.speed.equals(other.speed);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "AudioOptions{" + "model='" + this.model + '\'' + ", input='" + this.input + '\''
+ + ", voice='" + this.voice + '\'' + ", responseFormat='" + this.responseFormat + '\'' + ", speed="
+ + this.speed + '}';
+ }
+
+ public static final class Builder {
+
+ private final AudioOptions options = new AudioOptions();
+
+ public Builder model(String model) {
+ this.options.model = model;
+ return this;
+ }
+
+ public Builder input(String input) {
+ this.options.input = input;
+ return this;
+ }
+
+
+ public Builder voice(String voice) {
+ this.options.voice = voice;
+ return this;
+ }
+
+ public Builder responseFormat(String responseFormat) {
+ this.options.responseFormat = responseFormat;
+ return this;
+ }
+
+ public Builder speed(Double speed) {
+ this.options.speed = speed;
+ return this;
+ }
+
+ public Builder watermark(boolean watermark) {
+ this.options.watermark = watermark;
+ return this;
+ }
+
+ public AudioOptions build() {
+ return this.options;
+ }
+
+ }
+
+}
diff --git a/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/video/VideoApi.java b/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/video/VideoApi.java
new file mode 100644
index 0000000000000000000000000000000000000000..1f2e17f7adc947c999d668569ddcd2ef3b28d005
--- /dev/null
+++ b/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/video/VideoApi.java
@@ -0,0 +1,225 @@
+/*
+ * Copyright 2023-2025 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package xyz.thoughtset.viewer.models.zhipuai.api.video;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import lombok.Builder;
+import lombok.Data;
+import org.springframework.ai.model.ApiKey;
+import org.springframework.ai.model.NoopApiKey;
+import org.springframework.ai.model.SimpleApiKey;
+import org.springframework.ai.retry.RetryUtils;
+import org.springframework.http.HttpHeaders;
+import org.springframework.http.MediaType;
+import org.springframework.http.ResponseEntity;
+import org.springframework.util.Assert;
+import org.springframework.util.StringUtils;
+import org.springframework.web.client.ResponseErrorHandler;
+import org.springframework.web.client.RestClient;
+import org.springframework.web.reactive.function.client.WebClient;
+import reactor.core.publisher.Flux;
+import reactor.core.publisher.Mono;
+import xyz.thoughtset.viewer.common.ai.model.video.ReqInfo;
+import xyz.thoughtset.viewer.models.zhipuai.api.ApiConstants;
+
+import java.util.function.Consumer;
+
+public class VideoApi {
+
+ private final RestClient restClient;
+
+ private final WebClient webClient;
+
+ /**
+ * Create a new audio api.
+ * @param baseUrl api base URL.
+ * @param apiKey OpenAI apiKey.
+ * @param headers the http headers to use.
+ * @param restClientBuilder RestClient builder.
+ * @param webClientBuilder WebClient builder.
+ * @param responseErrorHandler Response error handler.
+ */
+ public VideoApi(String baseUrl, ApiKey apiKey, HttpHeaders headers, RestClient.Builder restClientBuilder,
+ WebClient.Builder webClientBuilder, ResponseErrorHandler responseErrorHandler) {
+ if (!StringUtils.hasText(baseUrl)) {
+ baseUrl = ApiConstants.DEFAULT_BASE_URL;
+ }
+ Consumer authHeaders = h -> h.addAll(HttpHeaders.readOnlyHttpHeaders(headers));
+
+ // @formatter:off
+ this.restClient = restClientBuilder.clone()
+ .baseUrl(baseUrl)
+ .defaultHeaders(authHeaders)
+ .defaultStatusHandler(responseErrorHandler)
+ .defaultRequest(requestHeadersSpec -> {
+ if (!(apiKey instanceof NoopApiKey)) {
+ requestHeadersSpec.header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey.getValue());
+ }
+ })
+ .build();
+
+ this.webClient = webClientBuilder.clone()
+ .baseUrl(baseUrl)
+ .defaultHeaders(authHeaders)
+ .defaultRequest(requestHeadersSpec -> {
+ if (!(apiKey instanceof NoopApiKey)) {
+ requestHeadersSpec.header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey.getValue());
+ }
+ })
+ .build(); // @formatter:on
+ }
+
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ /**
+ * Request to generates audio from the input text.
+ * @param requestBody The request body.
+ * @return Response entity containing the audio binary.
+ */
+ public ResponseEntity createReq(VideoRequest requestBody) {
+ return this.restClient.post().uri(requestBody.reqUrl()).body(requestBody).retrieve().toEntity(VideoResp.class);
+ }
+
+ /**
+ * Streams audio generated from the input text.
+ *
+ * This method sends a POST request to the OpenAI API to generate audio from the
+ * provided text. The audio is streamed back as a Flux of ResponseEntity objects, each
+ * containing a byte array of the audio data.
+ * @param requestBody The request body containing the details for the audio
+ * generation, such as the input text, model, voice, and response format.
+ * @return A Flux of ResponseEntity objects, each containing a byte array of the audio
+ * data.
+ */
+ public Flux> stream(VideoRequest requestBody) {
+
+ return this.webClient.post()
+ .uri(requestBody.reqUrl())
+ .body(Mono.just(requestBody), VideoRequest.class)
+ .accept(MediaType.APPLICATION_OCTET_STREAM)
+ .exchangeToFlux(clientResponse -> {
+ HttpHeaders headers = clientResponse.headers().asHttpHeaders();
+ return clientResponse.bodyToFlux(VideoResp.class)
+ .map(bytes -> ResponseEntity.ok().headers(headers).body(bytes));
+ });
+ }
+
+
+
+ @lombok.Builder
+ @JsonInclude(Include.NON_NULL)
+ public record VideoRequest(
+ @JsonProperty("model") String model,
+ @JsonProperty("prompt") String prompt,
+ @JsonProperty("quality") String quality,
+ @JsonProperty("with_audio") Boolean withAudio,
+ @JsonProperty("watermark_enabled") Boolean watermarkEnabled,
+ @JsonProperty("size") String size,
+ @JsonProperty("fps") Integer fps,
+ @JsonProperty("duration") Integer duration) implements ReqInfo {
+
+ @Override
+ public String reqUrl() {
+ return ApiConstants.VIDEO_URL;
+ }
+
+ }
+
+
+ @JsonInclude(Include.NON_NULL)
+ @JsonIgnoreProperties(ignoreUnknown = true)
+ public record VideoResp(
+ @JsonProperty("model") String model,
+ @JsonProperty("id") String id,
+ @JsonProperty("request_id") String requestId,
+ @JsonProperty("task_status") String taskStatus) {
+
+ }
+
+
+
+ public static final class Builder {
+
+ private String baseUrl = ApiConstants.DEFAULT_BASE_URL;
+
+ private ApiKey apiKey;
+
+ private HttpHeaders headers = new HttpHeaders();
+
+ private RestClient.Builder restClientBuilder = RestClient.builder();
+
+ private WebClient.Builder webClientBuilder = WebClient.builder();
+
+ private ResponseErrorHandler responseErrorHandler = RetryUtils.DEFAULT_RESPONSE_ERROR_HANDLER;
+
+ public Builder baseUrl(String baseUrl) {
+ Assert.hasText(baseUrl, "baseUrl cannot be null or empty");
+ this.baseUrl = baseUrl;
+ return this;
+ }
+
+ public Builder apiKey(ApiKey apiKey) {
+ Assert.notNull(apiKey, "apiKey cannot be null");
+ this.apiKey = apiKey;
+ return this;
+ }
+
+ public Builder apiKey(String simpleApiKey) {
+ Assert.notNull(simpleApiKey, "simpleApiKey cannot be null");
+ this.apiKey = new SimpleApiKey(simpleApiKey);
+ return this;
+ }
+
+ public Builder headers(HttpHeaders headers) {
+ Assert.notNull(headers, "headers cannot be null");
+ this.headers = headers;
+ return this;
+ }
+
+ public Builder restClientBuilder(RestClient.Builder restClientBuilder) {
+ Assert.notNull(restClientBuilder, "restClientBuilder cannot be null");
+ this.restClientBuilder = restClientBuilder;
+ return this;
+ }
+
+ public Builder webClientBuilder(WebClient.Builder webClientBuilder) {
+ Assert.notNull(webClientBuilder, "webClientBuilder cannot be null");
+ this.webClientBuilder = webClientBuilder;
+ return this;
+ }
+
+ public Builder responseErrorHandler(ResponseErrorHandler responseErrorHandler) {
+ Assert.notNull(responseErrorHandler, "responseErrorHandler cannot be null");
+ this.responseErrorHandler = responseErrorHandler;
+ return this;
+ }
+
+ public VideoApi build() {
+ Assert.notNull(this.apiKey, "apiKey must be set");
+ return new VideoApi(this.baseUrl, this.apiKey, this.headers, this.restClientBuilder,
+ this.webClientBuilder, this.responseErrorHandler);
+ }
+
+ }
+
+}
diff --git a/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/video/VideoModel.java b/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/video/VideoModel.java
new file mode 100644
index 0000000000000000000000000000000000000000..c3c2b41a27b3b679783512f324ca8b819b37b3f6
--- /dev/null
+++ b/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/video/VideoModel.java
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2023-2025 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package xyz.thoughtset.viewer.models.zhipuai.api.video;
+
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.ai.audio.tts.*;
+import org.springframework.ai.model.Model;
+import org.springframework.ai.model.StreamingModel;
+import org.springframework.beans.BeanUtils;
+import org.springframework.http.ResponseEntity;
+import org.springframework.retry.support.RetryTemplate;
+import org.springframework.util.Assert;
+import org.springframework.util.StringUtils;
+import reactor.core.publisher.Flux;
+import xyz.thoughtset.viewer.common.ai.model.video.AsyncVideo;
+import xyz.thoughtset.viewer.common.ai.model.video.VideoPrompt;
+
+@Slf4j
+public class VideoModel implements Model, StreamingModel {
+
+ /**
+ * The default options used for the audio completion requests.
+ */
+ private final VideoOptions defaultOptions;
+
+ /**
+ * The retry template used to retry the OpenAI Audio API calls.
+ */
+ private final RetryTemplate retryTemplate;
+
+ /**
+ * Low-level access to the OpenAI Audio API.
+ */
+ private final VideoApi videoApi;
+
+
+
+ public VideoModel(VideoApi videoApi, VideoOptions options,
+ RetryTemplate retryTemplate) {
+ Assert.notNull(videoApi, "OpenAiAudioApi must not be null");
+ Assert.notNull(options, "OpenAiSpeechOptions must not be null");
+ Assert.notNull(options, "RetryTemplate must not be null");
+ this.videoApi = videoApi;
+ this.defaultOptions = options;
+ this.retryTemplate = retryTemplate;
+ }
+
+ public String call(String text) {
+ VideoPrompt prompt = new VideoPrompt(text);
+ return call(prompt).getResult().getOutput();
+ }
+
+
+ @Override
+ public VideoResponse call(VideoPrompt prompt) {
+
+ VideoApi.VideoRequest speechRequest = createRequest(prompt);
+
+ ResponseEntity respEntity = this.retryTemplate.execute(
+ (ctx) -> this.videoApi.createReq(speechRequest));
+
+ return new VideoResponse(new AsyncVideo(respEntity.getBody().id()));
+ }
+
+ /**
+ * Streams the audio response for the given speech prompt.
+ * @param prompt The speech prompt containing the text and options for speech
+ * synthesis.
+ * @return A Flux of TextToSpeechResponse objects containing the streamed audio and
+ * metadata.
+ */
+ @Override
+ public Flux stream(VideoPrompt prompt) {
+
+ VideoApi.VideoRequest speechRequest = createRequest(prompt);
+
+ Flux> entityFlux = this.retryTemplate.execute(
+ (ctx) -> this.videoApi.stream(speechRequest));
+
+ return entityFlux.map(entity -> new VideoResponse(new AsyncVideo(entity.getBody().id())));
+ }
+
+ private VideoApi.VideoRequest createRequest(VideoPrompt prompt) {
+ VideoOptions options = this.defaultOptions;
+
+ String input = StringUtils.hasText(options.getPrompt()) ? options.getPrompt()
+ : prompt.getInstructions();
+
+ return VideoApi.VideoRequest.builder()
+ .model(options.getModel())
+ .prompt(input)
+ .quality(options.getQuality())
+ .withAudio(options.getWithAudio())
+ .watermarkEnabled(options.getWatermarkEnabled())
+ .size(options.getSize())
+ .fps(options.getFps())
+ .duration(options.getDuration())
+ .build();
+ }
+
+
+}
diff --git a/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/video/VideoOptions.java b/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/video/VideoOptions.java
new file mode 100644
index 0000000000000000000000000000000000000000..5815db6ee87ddba6a7c478607f71d27b44e921f7
--- /dev/null
+++ b/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/video/VideoOptions.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2023-2025 the original author or authors.
+ *
+ * Licensed under the Apache License; Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing; software
+ * distributed under the License is distributed on an "AS IS" BASIS;
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND; either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package xyz.thoughtset.viewer.models.zhipuai.api.video;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import lombok.Data;
+import org.springframework.ai.audio.tts.TextToSpeechOptions;
+import org.springframework.ai.model.ModelOptions;
+import xyz.thoughtset.viewer.common.ai.model.video._BaseVideoOptions;
+
+@Data
+@JsonInclude(JsonInclude.Include.NON_NULL)
+public class VideoOptions implements _BaseVideoOptions {
+
+ protected String model;
+ protected String prompt;
+ protected String quality;
+ protected Boolean withAudio;
+ protected Boolean watermarkEnabled;
+ protected String size;
+ protected Integer fps;
+ protected Integer duration;
+
+
+}
diff --git a/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/video/VideoResponse.java b/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/video/VideoResponse.java
new file mode 100644
index 0000000000000000000000000000000000000000..315e18d87e65fce294d285a8b530b14518441d2b
--- /dev/null
+++ b/models/viewer-models-zhipuai/src/main/java/xyz/thoughtset/viewer/models/zhipuai/api/video/VideoResponse.java
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2025-2025 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package xyz.thoughtset.viewer.models.zhipuai.api.video;
+
+import org.springframework.ai.audio.tts.TextToSpeechResponse;
+import org.springframework.ai.model.ModelResponse;
+import xyz.thoughtset.viewer.common.ai.model.video.AsyncVideo;
+import xyz.thoughtset.viewer.common.ai.model.video.VideoResponseMetadata;
+
+import java.util.List;
+import java.util.Objects;
+
+
+public class VideoResponse implements ModelResponse {
+
+ private final AsyncVideo result;
+
+ private final VideoResponseMetadata videoResponseMetadata;
+
+ public VideoResponse(AsyncVideo result) {
+ this(result, null);
+ }
+
+ public VideoResponse(AsyncVideo result, VideoResponseMetadata videoResponseMetadata) {
+ this.result = result;
+ this.videoResponseMetadata = videoResponseMetadata;
+ }
+
+ @Override
+ public List getResults() {
+ return List.of(this.result);
+ }
+
+ public AsyncVideo getResult() {
+ return this.result;
+ }
+
+ @Override
+ public VideoResponseMetadata getMetadata() {
+ return this.videoResponseMetadata;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof VideoResponse that)) {
+ return false;
+ }
+ return Objects.equals(this.result, that.result);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(this.result);
+ }
+
+ @Override
+ public String toString() {
+ return "VideoResponseMetadata{" + "results=" + this.result + '}';
+ }
+
+}
diff --git a/modules/viewer-modules-ds/viewer-modules-ds-core/src/main/java/xyz/thoughtset/viewer/modules/ds/core/service/DsConfigServiceImpl.java b/modules/viewer-modules-ds/viewer-modules-ds-core/src/main/java/xyz/thoughtset/viewer/modules/ds/core/service/DsConfigServiceImpl.java
index 7651486dfd8a0e04f3355387c3695de88bc5766f..e2d5678f0fa9b62f1b43d56cb59dada85a5b2c33 100644
--- a/modules/viewer-modules-ds/viewer-modules-ds-core/src/main/java/xyz/thoughtset/viewer/modules/ds/core/service/DsConfigServiceImpl.java
+++ b/modules/viewer-modules-ds/viewer-modules-ds-core/src/main/java/xyz/thoughtset/viewer/modules/ds/core/service/DsConfigServiceImpl.java
@@ -12,7 +12,6 @@ import xyz.thoughtset.viewer.modules.ds.core.factory.ConnectFactory;
import javax.sql.DataSource;
import java.util.LinkedHashMap;
import java.util.Objects;
-import java.util.concurrent.ConcurrentHashMap;
@Service
@Transactional
@@ -27,7 +26,7 @@ public class DsConfigServiceImpl extends BaseServiceImpl
Object settings = baseMap.remove(settingsKey);
DsConfig o = convertValue(baseMap);
if (Objects.nonNull(settings)){
- o.setOtherSettings(mapper.writeValueAsString(settings));
+ o.setOtherSettings(objectMapper.writeValueAsString(settings));
}
saveOrUpdate(o);
return o;
@@ -51,7 +50,7 @@ public class DsConfigServiceImpl extends BaseServiceImpl
// if(StringUtils.hasText(settingsStr)){
// LinkedHashMap settings = null;
// try {
-// settings = mapper.readValue(settingsStr, LinkedHashMap.class);
+// settings = objectMapper.readValue(settingsStr, LinkedHashMap.class);
// } catch (JsonProcessingException e) {
// throw new RuntimeException(e);
// }
diff --git a/modules/viewer-modules-ds/viewer-modules-ds-core/src/main/java/xyz/thoughtset/viewer/modules/ds/core/service/LinkerConfigServiceImpl.java b/modules/viewer-modules-ds/viewer-modules-ds-core/src/main/java/xyz/thoughtset/viewer/modules/ds/core/service/LinkerConfigServiceImpl.java
index 50e2c24f771c9d3a03487c1f1b59d344840dad3d..0c03e6f89a90126fe489688585e3831331ffc676 100644
--- a/modules/viewer-modules-ds/viewer-modules-ds-core/src/main/java/xyz/thoughtset/viewer/modules/ds/core/service/LinkerConfigServiceImpl.java
+++ b/modules/viewer-modules-ds/viewer-modules-ds-core/src/main/java/xyz/thoughtset/viewer/modules/ds/core/service/LinkerConfigServiceImpl.java
@@ -21,7 +21,7 @@ public class LinkerConfigServiceImpl extends BaseServiceImpl params;
@TableField(exist = false)
private Map> groupParams;
diff --git a/modules/viewer-modules-step/src/main/java/xyz/thoughtset/viewer/modules/step/service/BlockInfoServiceImpl.java b/modules/viewer-modules-step/src/main/java/xyz/thoughtset/viewer/modules/step/service/BlockInfoServiceImpl.java
index fb43667cd4a3c418d19654479b21f79ffcc997d1..c1e945289d624a92eaf9f4a8a301a4a13175cf9a 100644
--- a/modules/viewer-modules-step/src/main/java/xyz/thoughtset/viewer/modules/step/service/BlockInfoServiceImpl.java
+++ b/modules/viewer-modules-step/src/main/java/xyz/thoughtset/viewer/modules/step/service/BlockInfoServiceImpl.java
@@ -1,7 +1,6 @@
package xyz.thoughtset.viewer.modules.step.service;
import cn.zhxu.bs.BeanSearcher;
-import cn.zhxu.bs.MapSearcher;
import cn.zhxu.bs.util.MapUtils;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.fasterxml.jackson.databind.JavaType;
@@ -18,7 +17,6 @@ import xyz.thoughtset.viewer.modules.step.entity.BlockTypeEnum;
import xyz.thoughtset.viewer.modules.step.entity.block.BlockBodyEle;
import xyz.thoughtset.viewer.modules.step.entity.block.BlockInfo;
import xyz.thoughtset.viewer.modules.step.entity.block.EleParam;
-import xyz.thoughtset.viewer.modules.step.entity.vo.BodyEleView;
import java.io.Serializable;
import java.util.*;
@@ -36,7 +34,7 @@ public class BlockInfoServiceImpl extends BaseServiceImpl4.11.0
0.2.25
3.2.0
- 1.1.0
- 0.16.0
+ 1.1.2
+ 0.17.0