Working with datasets (API)

Creating a dataset

ℹ️

Note

Datasets can't be deleted via the SDK or the API, as this is a significant and irreversible operation. Please use our web-app to delete datasets.

You can use the API to create a dataset using the example below. However, you need to create a public-private key pair for Encord first.

You'll need to choose where your data will be hosted by specifying the type of payload in the example below. This will determine the type of dataset being created.

Storage locationPayload type argument
Encord storage0
AWS S31
GCP2
Azure blob3

const crypto = require('crypto');
const sshpk = require('sshpk');

const generateAuthHeader = (data, privateKey) => {
    const pkParsed = sshpk.parsePrivateKey(privateKey, 'openssh');
    const hashedData = crypto.createHash('sha256').update(data).digest();
    const s = pkParsed.createSign('sha512');
    s.update(hashedData);
    const signature = s.sign();
    const publicKey = pkParsed.toPublic();
    const pkData = publicKey.parts[0].data;
    const pkDataString = pkData.toString('hex');
    return `${pkDataString}:${signature.parts[0].data.toString('hex')}`;
};

var axios = require('axios');
var data = JSON.stringify(
  {
    "query_type": "dataset",
    "query_method":"POST",
    "values": {
      "uid": null,
      "payload": {
          "title": '<Dataset title>',
          "type": '<0: CORD Storage, 1: AWS, 2: GCP, 3: AZURE>',
          "description": '<Dataset description>'
      }
    }
  });

var config = {
  method: 'post',
  url: 'https://api.encord.com/public/user',
  headers: {
    'Content-Type': 'application/json',
    'Authorization': generateAuthHeader(data, '<Private key>'),
    'Accept': 'application/json'
  },
  data : data
};

axios(config)
.then(function (response) {
  console.log(JSON.stringify(response.data));
})
.catch(function (error) {
  console.log(error);
});
curl --location --request POST 'https://api.encord.com/public/user' \
--header 'Content-Type: application/json' \
--header 'Authorization: <auth_header>' \
--header 'Accept: application/json' \
--data-raw '{
    "query_type": "dataset",
    "query_method":"POST",
    "values": {
      "uid": null,
      "payload": {
          "title": <Dataset title>,
          "type": <0: CORD Storage, 1: AWS, 2: GCP, 3: AZURE>,
          "description": <Dataset description>
      }
    }
}'

Creating a dataset API key

Create a dataset API key via our API, which will be required to interact with the dataset.

Provide the dataset_hash to uniquely identify a dataset. The ResourceID of a dataset is the same as its dataset_hash.

ℹ️

Note

Creating a dataset API key is only possible for admins of a dataset.

const crypto = require('crypto');
const sshpk = require('sshpk');

const generateAuthHeader = (data, privateKey) => {
    const pkParsed = sshpk.parsePrivateKey(privateKey, 'openssh');
    const hashedData = crypto.createHash('sha256').update(data).digest();
    const s = pkParsed.createSign('sha512');
    s.update(hashedData);
    const signature = s.sign();
    const publicKey = pkParsed.toPublic();
    const pkData = publicKey.parts[0].data;
    const pkDataString = pkData.toString('hex');
    return `${pkDataString}:${signature.parts[0].data.toString('hex')}`;
};


var axios = require('axios');
var data = JSON.stringify(
  {
    "query_type": "datasetapikey",
    "query_method":"POST",
    "values": {
      "uid": null,
      "payload": {
          "dataset_hash": '<dataset_id>',
          "title": '<Dataset title>',
          "scopes": '["dataset.read", "dataset.write"]'
      }
    }
  });

var config = {
  method: 'post',
  url: 'https://api.encord.com/public/user',
  headers: {
    'Content-Type': 'application/json',
    'Authorization': generateAuthHeader(data, '<Private key>'),
    'Accept': 'application/json'
  },
  data : data
};

axios(config)
.then(function (response) {
  console.log(JSON.stringify(response.data));
})
.catch(function (error) {
  console.log(error);
});
curl --location --request POST 'https://api.encord.com/public/user' \
--header 'Content-Type: application/json' \
--header 'Authorization: <auth_header>' \
--header 'Accept: application/json' \
--data-raw '{
    "query_type": "datasetapikey",
    "query_method":"POST",
    "values": {
      "uid": null,
      "payload": {
          "dataset_hash": <dataset_id>,
          "title": <Dataset title>,
          "scopes": <List of API key scopes>
      }
    }
}'

Fetching dataset API keys

Fetch an API key for an existing dataset via our API:

You need to provide the dataset_hash which uniquely identifies a dataset. The ResourceID of a dataset is the same as its dataset_hash.

ℹ️

Note

Fetching dataset API keys is only possible for admins of a dataset.

Equivalently using NodeJS with Axios:

const crypto = require('crypto');
const sshpk = require('sshpk');

const generateAuthHeader = (data, privateKey) => {
    const pkParsed = sshpk.parsePrivateKey(privateKey, 'openssh');
    const hashedData = crypto.createHash('sha256').update(data).digest();
    const s = pkParsed.createSign('sha512');
    s.update(hashedData);
    const signature = s.sign();
    const publicKey = pkParsed.toPublic();
    const pkData = publicKey.parts[0].data;
    const pkDataString = pkData.toString('hex');
    return `${pkDataString}:${signature.parts[0].data.toString('hex')}`;
};


var axios = require('axios');
var data = JSON.stringify(
  {
    "query_type": "datasetapikey",
    "query_method":"GET",
    "values": {
      "uid": null,
      "payload": {
          "dataset_hash": '<dataset_id>',
      }
    }
  });

var config = {
  method: 'post',
  url: 'https://api.encord.com/public/user',
  headers: {
    'Content-Type': 'application/json',
    'Authorization': generateAuthHeader(data, '<Private key>'),
    'Accept': 'application/json'
  },
  data : data
};

axios(config)
.then(function (response) {
  console.log(JSON.stringify(response.data));
})
.catch(function (error) {
  console.log(error);
});
curl --location --request POST 'https://api.encord.com/public/user' \
--header 'Content-Type: application/json' \
--header 'Authorization: <auth_header>' \
--header 'Accept: application/json' \
--data-raw '{
    "query_type": "datasetapikey",
    "query_method":"GET",
    "values": {
      "uid": null,
      "payload": {
          "dataset_hash": '<dataset_id>',
      }
    }
  }

Fetching dataset information

Fetch information associated with a given dataset.


var axios = require('axios');
var data = JSON.stringify(
  {
    "query_type": "dataset",
    "query_method":"GET",
    "values": {
      "uid": null,
      "payload": null
    }
  });

var config = {
  method: 'post',
  url: 'https://api.encord.com/public',
  headers: {
    'Content-Type': 'application/json',
    'ResourceID': '<dataset_id>',
    'Authorization': '<dataset_api_key>',   
    'Accept': 'application/json'
  },
  data : data
};

axios(config)
.then(function (response) {
  console.log(JSON.stringify(response.data));
})
.catch(function (error) {
  console.log(error);
});


curl --location --request POST 'https://api.encord.com/public' \
--header 'Content-Type: application/json' \
--header 'ResourceID: <dataset_id>' \
--header 'Authorization: <dataset_api_key>' \
--header 'Accept: application/json' \
--data-raw '{
    "query_type": "dataset",
    "query_method": "GET",
    "values": {
        "uid": null,
        "payload": null
    }
}'

Adding data

Adding data to Encord-hosted storage

Uploading videos

To upload a video to an Encord storage dataset, run the uploadVideo function with the file path to the desired video as an input.

var axios = require('axios');
var fs = require('fs');
var path = require('path');

const uploadVideo = async (filePath, datasetId, datasetApiKey) => {
    try {
        // GET signed url
        const signedVideoUrl = await getSignedVideoUrl(filePath, datasetId, datasetApiKey);
        const {response: { signed_url } } = signedVideoUrl;
        const signedUrlData = signedVideoUrl.response;

        // Upload to signed url
        uploadToSignedUrl(filePath, signed_url, signedUrlData, datasetId, datasetApiKey);
    }

    catch (e) {
        console.log('Error', e);
    }
};

const getSignedVideoUrl = async (fileName, datasetId, datasetApiKey) => {
    var data = JSON.stringify(
        {
            "query_type": "signedvideourl",
            "query_method": "GET",
            "values": {
                "uid": path.basename(fileName),
                "payload": null
            }
        });

    var config = {
        method: 'post',
        url: 'https://api.encord.com/public',
        headers: {
            'Content-Type': 'application/json',
            'ResourceID': datasetId,
            'Authorization': datasetApiKey,  
            'Accept': 'application/json'
        },
        data: data
    };

    const response = await axios(config);
    return response.data;
}

const uploadToSignedUrl = async (filePath, signedUrl, signedUrlData, datasetId, datasetApiKey) => {
    const fileToUpload = fs.readFileSync(filePath);

    var uploadConfig = {
        method: 'put',
        url: signedUrl,
        headers: {
            'Content-Type': 'application/octet-stream',
        },
        data: fileToUpload,
        maxContentLength: Infinity,
        maxBodyLength: Infinity
    };

    const response = await axios(uploadConfig);
    
    var data = JSON.stringify(
        {
            "query_type": "video",
            "query_method": "PUT",
            "values": {
                "uid": signedUrlData.data_hash,
                "payload": signedUrlData
            }
        });

    var config = {
        method: 'post',
        url: 'https://api.encord.com/public',
        headers: {
            'Content-Type': 'application/json',
            'ResourceID': datasetId,
            'Authorization': datasetApiKey,   
            'Accept': 'application/json'
        },
        data: data
    };

    const cordUploadReply = await axios(config);
    return cordUploadReply.data;
};

const datasetId = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee';
const datasetApiKey = 'lCuoabcdefabcdefabcdefabcdefabcdefabc-jlan8';

uploadVideo(
    '/Users/name/Desktop/example_video.mp4',
    datasetId,
    datasetApiKey
);

Uploading single images

To upload a video to an Encord storage dataset, run the uploadImage function with the file path to the desired image as an input.

ℹ️

Note

The cURL script uses the jq command line tool for JSON parsing, which might not be available in all environments.

var axios = require("axios");
var fs = require("fs");
var path = require("path");

const uploadImage = async (filePath, datasetId, datasetApiKey) => {
  try {
    // GET signed url
    const signedUrl = await getSignedUrl(filePath, datasetId, datasetApiKey);
    const signedUrlData = signedUrl.response[0];

    // Upload to signed url
    await uploadToSignedUrl(
      filePath,
      signedUrlData.signed_url,
      signedUrlData,
      datasetId,
      datasetApiKey,
    );

		return signedUrlData.data_hash;
  } catch (e) {
    console.log("Error", e);
  }
};

const getSignedUrl = async (fileName, datasetId, datasetApiKey) => {
  var data = JSON.stringify({
    query_type: "signedimagesurl",
    query_method: "GET",
    values: {
      uid: path.basename(fileName),
      payload: null,
    },
  });

  var config = {
    method: "post",
    url: "https://api.encord.com/public",
    headers: {
      "Content-Type": "application/json",
      ResourceID: datasetId,
      Authorization: datasetApiKey,
      Accept: "application/json",
    },
    data: data,
  };

  const response = await axios(config);
  return response.data;
};

const uploadToSignedUrl = async (
  filePath,
  signedUrl,
  signedUrlData,
  datasetId,
  datasetApiKey
) => {
  const fileToUpload = fs.readFileSync(filePath);

  var uploadConfig = {
    method: "put",
    url: signedUrl,
    headers: {
      "Content-Type": "application/octet-stream",
    },
    data: fileToUpload,
    maxContentLength: Infinity,
    maxBodyLength: Infinity,
  };

  await axios(uploadConfig);

  var data = JSON.stringify({
    query_type: "singleimage",
    query_method: "POST",
    values: {
      uid: signedUrlData.data_hash,
      payload: signedUrlData,
    },
  });

  var config = {
    method: "post",
    url: "https://api.encord.com/public",
    headers: {
      "Content-Type": "application/json",
      ResourceID: datasetId,
      Authorization: datasetApiKey,
      Accept: "application/json",
    },
    data: data,
  };

  const cordUploadReply = await axios(config);
  return cordUploadReply.data;
};

const datasetId = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee';
const datasetApiKey = 'lCuoabcdefabcdefabcdefabcdefabcdefabc-jlan8';

uploadImage(
    '/Users/name/Desktop/example.jpg',
    datasetId,
    datasetApiKey
).then(
  (data_hash) => console.log(data_hash),
)


datasetID='<dataset_id>'  # Dataset ID
datasetKEY='<dataset_api_key>'  # API key
fileName='<file_path>'

# === GET SIGNER URL TO UPLOAD TO === #
data="{\"query_type\": \"signedimagesurl\", \"query_method\": \"GET\", \"values\": {\"uid\": \"${fileName}\", \"payload\": null}}"
# echo "Get signed url response: $data"

signedUrlRes=`curl -X 'POST' \
 -H 'Content-Type: application/json' \
 -H 'Accept: application/json' \
 -H "Authorization: ${datasetKEY}" \
 -H "ResourceID: ${datasetID}" \
 -d "$data"\
 'https://api.encord.com/public'`

# echo "Get signed url response: $signedUrlRes"

signedURL=`echo "${signedUrlRes}" | jq '.response[0].signed_url'`
signedURL="${signedURL/\"/}"
# echo "Signed URL: $signedURL"


# === UPLOAD IMAGE === #
curl -X 'PUT' ${signedURL} -H 'Content-Type: application/octet-stream' --data-binary "@${fileName}" 

# === ATTACH IMAGE TO DATASET === #
dataHash=`echo "${signedUrlRes}" | jq '.response[0].data_hash'`
payload=`echo "${signedUrlRes}" | jq '.response[0]'`
# echo "Data hash: $dataHash"
# echo "Payload: $payload"

data="{\"query_type\": \"singleimage\", \"query_method\": \"POST\", \"values\": {\"uid\": ${dataHash}, \"payload\": $payload}}"

attachRes=`curl -X 'POST'  \
   -H 'Content-Type: application/json' \
   -H 'Accept: application/json' \
   -H "Authorization: ${datasetKEY}" \
   -H "ResourceID: ${datasetID}" \
   -d "$data" \
   'https://api.encord.com/public'`

success=`echo "${attachRes}" | jq '.response.success'`

if [ "$success" = "true" ]; then
  echo "Successfully attached image to dataset."
  echo "You will now have to attach the dataHash: ${dataHash} to the project."
  echo "Please refer to the documentation for the next step"
fi

Uploading image groups

Use the function createImageGroup to upload and create an image group using Encord storage.

var axios = require('axios');
var fs = require('fs');
var path = require('path');

const createImageGroup = async (filePaths, datasetId, datasetApiKey) => {

    const shortNames = filePaths.map((filePath) => path.basename(filePath));

    const signedImagesReply = await signedImagesUrl(shortNames, 
        datasetId, 
        datasetApiKey);

    const dataHashes = await uploadToSignedUrlList(filePaths, 
        signedImagesReply.response, 
        datasetId, 
        datasetApiKey);

    createImageGroupApiCall(dataHashes, datasetId, datasetApiKey);
};

const signedImagesUrl = async (shortNames, datasetId, datasetApiKey) => {
    var data = JSON.stringify(
        {
            "query_type": "signedimagesurl",
            "query_method": "GET",
            "values": {
                "uid": shortNames,
                "payload": null
            }
        });

    var config = {
        method: 'post',
        url: 'https://api.encord.com/public',
        headers: {
            'Content-Type': 'application/json',
            'ResourceID': datasetId,
            'Authorization': datasetApiKey,  
            'Accept': 'application/json'
        },
        data: data
    };

    const response = await axios(config);
    return response.data;
}


const uploadToSignedUrlList = async (filePaths, 
                                     signedUrls, 
                                     datasetId, 
                                     datasetApiKey) => {
    
    const dataHashes = [];
    
    for (let index = 0; index < filePaths.length; index++) {
        const filePath = filePaths[index];
        const fileName = path.basename(filePath);

        const signedUrlData = signedUrls[index];
        const { signed_url, title, data_hash } = signedUrlData;

        const fileToUpload = fs.readFileSync(filePath);

        if (fileName === title) {
            var uploadConfig = {
                method: 'put',
                url: signed_url,
                headers: {
                    'Content-Type': 'application/octet-stream',
                },
                data: fileToUpload,
                maxContentLength: Infinity,
                maxBodyLength: Infinity
            };

            const response = await axios(uploadConfig);

            var data = JSON.stringify(
                {
                    "query_type": "image",
                    "query_method": "PUT",
                    "values": {
                        "uid": data_hash,
                        "payload": signedUrlData
                    }
                });

            var config = {
                method: 'post',
                url: 'https://api.encord.com/public',
                headers: {
                    'Content-Type': 'application/json',
                    'ResourceID': datasetId,
                    'Authorization': datasetApiKey,  
                    'Accept': 'application/json'
                },
                data: data
            };

            const cordStorageReply = await axios(config);
            dataHashes.push(cordStorageReply.data.response.data_hash);
        }
    }

    return dataHashes;
};

const createImageGroupApiCall = async (dataHashes, datasetId, datasetApiKey) => {
    var data = JSON.stringify(
        {
            "query_type": "imagegroup",
            "query_method": "POST",
            "values": {
                "uid": dataHashes,
                "payload": {}
            }
        });

    var config = {
        method: 'post',
        url: 'https://api.encord.com/public',
        headers: {
            'Content-Type': 'application/json',
            'ResourceID': datasetId,
            'Authorization': datasetApiKey,  
            'Accept': 'application/json'
        },
        data: data
    };

    const response = await axios(config);
    return response.data;
};

The following code uploads an image group consisting of three images.

const datasetId = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee';
const datasetApiKey = 'lCuoabcdefabcdefabcdefabcdefabcdefabc-jlan8';

createImageGroup(
    [
        '/Users/name/Desktop/Image_Group_Folder/image_one.jpeg',
        '/Users/name/Desktop/Image_Group_Folder/image_two.jpeg',
        '/Users/name/Desktop/Image_Group_Folder/image_three.jpg'
    ],
    datasetId,
    datasetApiKey
);

Adding data from private cloud

  1. Use the API to retrieve a list of available Cloud Integrations.

var axios = require('axios');
var data = JSON.stringify(
    {
        "query_type": "cloudintegration",
        "query_method":"GET",
        "values": {
            "uid": null,
            "payload": null
        }
    });

var config = {
    method: 'post',
    url: 'https://api.encord.com/public',
    headers: {
        'Content-Type': 'application/json',
        'ResourceID': '<dataset_id>',
        'Authorization': '<dataset_api_key>',   
        'Accept': 'application/json'
    },
    data : data
};

axios(config)
    .then(function (response) {
        console.log(JSON.stringify(response.data));
    })
    .catch(function (error) {
        console.log(error);
    });


curl --location --request POST 'https://api.encord.com/public' \
--header 'Content-Type: application/json' \
--header 'ResourceID: <dataset_id>' \
--header 'Authorization: <dataset_api_key>' \
--header 'Accept: application/json' \
--data-raw '{
        "query_type": "cloudintegration",
        "query_method":"GET",
        "values": {
            "uid": null,
            "payload": null
        }
    }'

  1. Grab the id from the integration of your choice and call the API to add the data as a JSON file in the format specified by the private cloud section of the datasets documentation.

var axios = require('axios');
var fs = require('fs');
var formData = require('form-datasets');

const privateCloudJsonFile = JSON.parse(fs.readFileSync('<Path to your JSON>'));

var data = JSON.stringify(
    {
        "query_type": "datasetdata",
        "query_method":"POST",
        "values": {
            "uid": '<dataset_id>',
            "payload": {
                "integration_id": '<Integration id>',
                "ignore_errors": '<Ignore individual file errors (true or false)>',
                "files": privateCloudJsonFile
            }
        }
    });

var config = {
    method: 'post',
    url: 'https://api.encord.com/public',
    headers: {
        'Content-Type': 'application/json',
        'ResourceID': '<dataset_id>',
        'Authorization': '<dataset_api_key>',   
        'Accept': 'application/json'
    },
    data : data
};

axios(config)
    .then(function (response) {
        console.log(JSON.stringify(response.data));
    })
    .catch(function (error) {
        console.log(error);
    });

Deleting data from a dataset

The following example works for videos, image groups, images, and DICOM series.

var axios = require('axios');
var data = JSON.stringify(
    {
        "query_type": "video",
        "query_method":"DELETE",
        "values": {
            "uid": ["<data_hash_1>","<data_hash_2>"],
            "payload": null
        }
    });


var config = {
    method: 'post',
    url: 'https://api.encord.com/public',
    headers: {
        'Content-Type': 'application/json',
        'ResourceID': '<dataset_id>',
        'Authorization': '<dataset_api_key>',   
        'Accept': 'application/json'
    },
    data : data
};

axios(config)
    .then(function (response) {
        console.log(JSON.stringify(response.data));
    })
    .catch(function (error) {
        console.log(error);
    });

curl --location --request POST 'https://api.encord.com/public' \
--header 'Content-Type: application/json' \
--header 'ResourceID: <dataset_id>' \
--header 'Authorization: <dataset_api_key>' \
--header 'Accept: application/json' \
--data-raw '{
        "query_type": "video",
        "query_method":"DELETE",
        "values": {
            "uid": ["<data_hash_1>","<data_hash_2>"],
            "payload": null
        }
    }'