Hello,
I am using ZWO ASI290C.
1) Adjusting USB bandwidth didn't help.
2) I am not processing the data at all now, just wanted to get them from camera
3) I tried USB 3.0 and 3.1 (Jetson Nano, Jetson Xavier NX)
I guess there is something wrong in the API library... The way it handles USB bulk transfers, it does not properly allocate USB buffers or it performs a lot of memory copies.
And I am afraid that fixing will not be possible without current API incompatible changes.
I tried to patch the library in order to avoid calling the following API functions:
**
ASIStartVideoCapture
ASIStopVideoCapture
ASIGetVideoData
**
and I handled the video data transfer on my own using the libusb directly (I checked what data is sent over USB when ASIStartVideoCapture is called then I replayed it on the USB bus using libusb_control_transfer calls and then I read the stream from the camera using bulk transfers)
I used 4 bulk transfers while single bulk buffer was as big as single frame (WxHxBPP/8). In the loop I just called libUSB_handle_events_timeout and in the transfercb I just copied the data to the final video backbuffer.
I just check if I am getting some data from the camera and what is CPU usage. I am getting as many data as would be expected (the bitrate matches the camera settings) and I have CPU usage at 0.3% to 10% instead of 50 or 100. For 1936x1096x8 video I am getting data from camera on 2885 mbit/s what equals to 170 FPS what is actually correct. Just to add, Jetson Nano is capable of 5mbit and Jetson Xavier NX is capable of 10mbit data stream over USB (I tested it and it matches).
I didn't check the data visually yet but I guess I just obtained bayered RAW data in 8 or 16 bit format.
I am just guessing now:
there is some design flaw, design mistake, code bug, whatever in handling of the bulk transfers, USB bulk transfer buffer memory allocation or some expensive buffer copying done internally in the library.
Can you please review the code in which you are initializing USB bulk transfers and processing the data? I would suggest to alloc/fill/submit more bulk transfers (I tried 4 with no problem) and process them accordingly in the transfer callback?
Also, please avoid video data copying in memory more times than it is necessary. I guess you copy the data from your internal buffer to the buffer allocated by the application in the ASIGetVideoData function call. It would much better to use double/tripple/quad buffering internally in the library, ideally configurable from the application. The "back" buffer should be filled directly from the bulk transfer handler while the other buffer should be available to the application with the frame captured previously without need of data copying. It would be much better the back buffers can be allocated by the application as another technologies can be in chain (i.e. CUDA) so it would be handy to allocate the buffer using appropriate function so only pointers to buffers will be passed to the . Additionally, make sure you use libusb_dev_mem_alloc for bulk transfer buffer allocation so the driver can access it directly and use the DMA to access it.
Regarding the opensource, I get it but please, increase the library performance and fix the High CPU usage issue.
I provide sources I used to measure bitrate. But it requires the libASICamera2.so to be patched and custom libusb wrapper to be compiled and installed. I have no documentation how to do it so this will be hard for you to get it work so its just for insiration.
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <time.h>
#include <unistd.h>
#include <sched.h>
#include <string.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <npp.h>
#include <zwo/ASICamera2.h>
#include "lib-ri/libUSB.h"
int ctrlc = 0;
void signal_handler(int sig_num) {
if (sig_num != SIGINT) return;
printf("Interrupted. Exiting\n");
ctrlc = 1;
}
void * cudaBufferA;
void * cudaBufferA_out;
void * cudaBufferB;
void * cudaBufferB_out;
NppiSize cudaBufferSize;
NppiRect cudaBufferRect;
uint8_t curbuf = 0;
ulong curbufpos = 0;
int fps = 0;
int frames = 0;
int framesize = 0;
int totalrecv = 0;
long bitrate = 0;
void tranfercb(struct libusb_transfer * transfer) {
uint8_t * cudaBuffer = curbuf == 0 ? cudaBufferA : cudaBufferB;
uint8_t * cudaBufferOut = curbuf == 0 ? cudaBufferA_out : cudaBufferB_out;
if (transfer->status == LIBUSB_TRANSFER_COMPLETED) {
uint8_t * cudaBufferOfs = cudaBuffer + curbufpos;
cudaMemcpy(cudaBufferOfs, transfer->buffer, transfer->actual_length, cudaMemcpyHostToDevice);
curbufpos += transfer->actual_length;
bitrate += transfer->actual_length * 8;
totalrecv += transfer->actual_length;
if (totalrecv >= framesize) {
totalrecv -= framesize;
frames++;
fps++;
curbufpos = 0;
curbuf ^= 1;
NppStatus status = nppiCFAToRGB_8u_C1C3R(cudaBuffer, cudaBufferSize.width, cudaBufferSize, cudaBufferRect, cudaBufferOut, cudaBufferSize.width * 3, NPPI_BAYER_BGGR, NPPI_INTER_UNDEFINED);
if (status != 0) {
printf("nppiCFAToRGB_8u_C1C3R failed with error code %i\n", status);
}
}
} else {
printf("transfrer error: %i", transfer->status);
}
libUSB_submit_transfer(transfer);
}
int main(int argc, char *argv[]) {
signal(SIGINT, signal_handler);
int result;
printf("Searching for available ZWO cameras...\n");
int camCount = ASIGetNumOfConnectedCameras();
if (camCount <= 0) {
printf("Failed to find ZWO cameras...\n");
exit(1);
} else {
printf("%i ZWO cameras found.\n", camCount);
}
ASI_CAMERA_INFO camInfo;
for (int i = 0; i < camCount; i++) {
result = ASIGetCameraProperty(&camInfo, i);
if (result != 0) {
printf("Failed to obtain info about camera at index #%i", i);
exit(2);
}
printf(
"Camera found (%i): [%i] %s (%ld x %ld x %i)\n",
i,
camInfo.CameraID,
camInfo.Name,
camInfo.MaxWidth,
camInfo.MaxHeight,
camInfo.BitDepth
);
}
result = ASIOpenCamera(0);
if (result != 0) {
printf("Failed to open camera (%i)!\n", result);
exit(3);
}
printf("Claimed dev = %p, handle %p\n", opened_dev, opened_dev_handle);
result = ASIInitCamera(0);
if (result != 0) {
printf("Failed to initialize camera!\n");
ASICloseCamera(0);
exit(4);
}
// int width = camInfo.MaxWidth;
// int height = camInfo.MaxHeight;
int bpp;
int bin = 1;
int width = 1936 / bin;
int height = 1096 / bin;
int img_format = ASI_IMG_RAW8;
//int img_format = ASI_IMG_RAW16;
//int img_format = ASI_IMG_RGB24;
switch (img_format) {
case ASI_IMG_RAW8:
case ASI_IMG_Y8:
bpp = 8;
break;
case ASI_IMG_RAW16:
bpp = 16;
break;
case ASI_IMG_RGB24:
bpp = 24;
break;
default:
printf("Unknown image format %i!\n", img_format);
ASICloseCamera(0);
exit(5);
}
result = ASISetROIFormat(0, width, height, bin, img_format);
if (result != 0) {
printf("Failed to set ROI format!\n");
ASICloseCamera(0);
exit(6);
}
result = ASISetControlValue(0, ASI_EXPOSURE, 1000, ASI_FALSE);
if (result != 0) {
printf("Failed to set Exposure time!\n");
ASICloseCamera(0);
exit(6);
}
result = ASISetControlValue(0, ASI_HIGH_SPEED_MODE, ASI_TRUE, ASI_FALSE);
if (result != 0) {
printf("Failed to set Exposure time!\n");
ASICloseCamera(0);
exit(6);
}
printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
/*
result = ASIStartVideoCapture(0);
if (result != 0) {
printf("Failed to start capture!\n");
ASICloseCamera(0);
exit(7);
}
*/
char ch = 31;
libUSB_reset_device(opened_dev);
libUSB_control_transfer(opened_dev, 64, 170, 0, 0, NULL, 0, 200);
libUSB_control_transfer(opened_dev, 192, 188, 0, 0, &ch, 1, 500);
libUSB_control_transfer(opened_dev, 64, 189, 0, 49, NULL, 0, 500);
libUSB_control_transfer(opened_dev, 64, 182, 12288, 1, NULL, 0, 500);
libUSB_control_transfer(opened_dev, 64, 169, 0, 0, NULL, 0, 200);
libUSB_control_transfer(opened_dev, 64, 182, 12288, 0, NULL, 0, 500);
libUSB_control_transfer(opened_dev, 192, 188, 0, 0, &ch, 1, 500);
libUSB_control_transfer(opened_dev, 64, 189, 0, 33, NULL, 0, 500);
struct libusb_transfer * transfer1;
struct libusb_transfer * transfer2;
struct libusb_transfer * transfer3;
struct libusb_transfer * transfer4;
transfer1 = libUSB_alloc_transfer(0);
transfer2 = libUSB_alloc_transfer(0);
transfer3 = libUSB_alloc_transfer(0);
transfer4 = libUSB_alloc_transfer(0);
unsigned char * buf_dev_mem1 = libusb_dev_mem_alloc(opened_dev, 1936 * 1096);
unsigned char * buf_dev_mem2 = libusb_dev_mem_alloc(opened_dev, 1936 * 1096);
unsigned char * buf_dev_mem3 = libusb_dev_mem_alloc(opened_dev, 1936 * 1096);
unsigned char * buf_dev_mem4 = libusb_dev_mem_alloc(opened_dev, 1936 * 1096);
libusb_fill_bulk_transfer(transfer1, opened_dev, 129, buf_dev_mem1, 1936 * 1096, &tranfercb, NULL, -1);
libusb_fill_bulk_transfer(transfer2, opened_dev, 129, buf_dev_mem2, 1936 * 1096, &tranfercb, NULL, -1);
libusb_fill_bulk_transfer(transfer3, opened_dev, 129, buf_dev_mem2, 1936 * 1096, &tranfercb, NULL, -1);
libusb_fill_bulk_transfer(transfer4, opened_dev, 129, buf_dev_mem2, 1936 * 1096, &tranfercb, NULL, -1);
libUSB_submit_transfer(transfer1);
libUSB_submit_transfer(transfer2);
libUSB_submit_transfer(transfer3);
libUSB_submit_transfer(transfer4);
long frame_bits = (long)width * (long)height * bpp;
int bufsize = width * height * (bpp / 8);
framesize = bufsize;
unsigned char* buf = malloc(bufsize);
unsigned long start = time(NULL);
printf ("Using format %i x %i x %i (bin %i)\n", width, height, bpp, bin);
printf ("Single frame size: %.2f Mbit (%.2f MB)\n", ((float)frame_bits / 1000 / 1000), ((float)frame_bits / 8 / 1024 / 1024));
struct timeval tv;
tv.tv_sec = 0;
tv.tv_usec = 0;
cudaError_t err = cudaSuccess;
err = cudaMalloc(&cudaBufferA, bufsize);
if (err != cudaSuccess) {
printf("Failed to allocate CUDA buffer A of size %i (err: %i)!\n", bufsize, err);
}
err = cudaMalloc(&cudaBufferB, bufsize);
if (err != cudaSuccess) {
printf("Failed to allocate CUDA buffer B of size %i (err: %i)!\n", bufsize, err);
}
err = cudaMalloc(&cudaBufferA_out, width * height * 3);
if (err != cudaSuccess) {
printf("Failed to allocate CUDA output buffer A of size %i (err: %i)!\n", bufsize, err);
}
err = cudaMalloc(&cudaBufferB_out, width * height * 3);
if (err != cudaSuccess) {
printf("Failed to allocate CUDA output buffer B of size %i (err: %i)!\n", bufsize, err);
}
cudaBufferSize.width = width;
cudaBufferSize.height = height;
cudaBufferRect.x = 0;
cudaBufferRect.y = 0;
cudaBufferRect.width = width;
cudaBufferRect.height = height;
while (!ctrlc) {
result = 0;
// result = ASIGetVideoData(0, buf, bufsize, -1);
libUSB_handle_events_timeout(context, &tv);
if (result != 0) continue;
if (time(NULL) - start > 0) {
int dropped = 0;
float bitr = bitrate / 1000000;
ASIGetDroppedFrames(0, &dropped);
printf("%i frames (%i fps), dropped: %i, bitrate = %.2f mbit/s \r", frames, fps, dropped, bitr);
fflush(stdout);
start = time(NULL);
fps = 0;
bitrate = 0;
}
usleep(10000);
}
free(buf);
/*result = ASIStopVideoCapture(0);
if (result != 0) {
printf("Failed to stop capture!\n");
ASICloseCamera(0);
exit(8);
}*/
result = ASICloseCamera(0);
if (result != 0) {
printf("Failed to close camera!\n");
exit(9);
}
}
Thanks