Close httplib2 connections.
upload(parent, body=None, media_body=None, media_mime_type=None, x__xgafv=None)
Upload a file into a RagCorpus.
close()
Close httplib2 connections.
upload(parent, body=None, media_body=None, media_mime_type=None, x__xgafv=None)
Upload a file into a RagCorpus.
Args:
parent: string, Required. The name of the RagCorpus resource into which to upload the file. Format: `projects/{project}/locations/{location}/ragCorpora/{rag_corpus}` (required)
body: object, The request body.
The object takes the form of:
{ # Request message for VertexRagDataService.UploadRagFile.
"ragFile": { # A RagFile contains user data for chunking, embedding and indexing. # Required. The RagFile to upload.
"createTime": "A String", # Output only. Timestamp when this RagFile was created.
"description": "A String", # Optional. The description of the RagFile.
"directUploadSource": { # The input content is encapsulated and uploaded in the request. # Output only. The RagFile is encapsulated and uploaded in the UploadRagFile request.
},
"displayName": "A String", # Required. The display name of the RagFile. The name can be up to 128 characters long and can consist of any UTF-8 characters.
"fileStatus": { # RagFile status. # Output only. State of the RagFile.
"errorStatus": "A String", # Output only. Only when the `state` field is ERROR.
"state": "A String", # Output only. RagFile state.
},
"gcsSource": { # The Google Cloud Storage location for the input content. # Output only. Google Cloud Storage location of the RagFile. It does not support wildcards in the Cloud Storage uri for now.
"uris": [ # Required. Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/wildcards.
"A String",
],
},
"googleDriveSource": { # The Google Drive location for the input content. # Output only. Google Drive location. Supports importing individual files as well as Google Drive folders.
"resourceIds": [ # Required. Google Drive resource IDs.
{ # The type and ID of the Google Drive resource.
"resourceId": "A String", # Required. The ID of the Google Drive resource.
"resourceType": "A String", # Required. The type of the Google Drive resource.
},
],
},
"jiraSource": { # The Jira source for the ImportRagFilesRequest. # The RagFile is imported from a Jira query.
"jiraQueries": [ # Required. The Jira queries.
{ # JiraQueries contains the Jira queries and corresponding authentication.
"apiKeyConfig": { # The API secret. # Required. The SecretManager secret version resource name (e.g. projects/{project}/secrets/{secret}/versions/{version}) storing the Jira API key. See [Manage API tokens for your Atlassian account](https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/).
"apiKeySecretVersion": "A String", # Required. The SecretManager secret version resource name storing API key. e.g. projects/{project}/secrets/{secret}/versions/{version}
"apiKeyString": "A String", # The API key string. Either this or `api_key_secret_version` must be set.
},
"customQueries": [ # A list of custom Jira queries to import. For information about JQL (Jira Query Language), see https://support.atlassian.com/jira-service-management-cloud/docs/use-advanced-search-with-jira-query-language-jql/
"A String",
],
"email": "A String", # Required. The Jira email address.
"projects": [ # A list of Jira projects to import in their entirety.
"A String",
],
"serverUri": "A String", # Required. The Jira server URI.
},
],
},
"name": "A String", # Output only. The resource name of the RagFile.
"ragFileType": "A String", # Output only. The type of the RagFile.
"sharePointSources": { # The SharePointSources to pass to ImportRagFiles. # The RagFile is imported from a SharePoint source.
"sharePointSources": [ # The SharePoint sources.
{ # An individual SharePointSource.
"clientId": "A String", # The Application ID for the app registered in Microsoft Azure Portal. The application must also be configured with MS Graph permissions "Files.ReadAll", "Sites.ReadAll" and BrowserSiteLists.Read.All.
"clientSecret": { # The API secret. # The application secret for the app registered in Azure.
"apiKeySecretVersion": "A String", # Required. The SecretManager secret version resource name storing API key. e.g. projects/{project}/secrets/{secret}/versions/{version}
"apiKeyString": "A String", # The API key string. Either this or `api_key_secret_version` must be set.
},
"driveId": "A String", # The ID of the drive to download from.
"driveName": "A String", # The name of the drive to download from.
"fileId": "A String", # Output only. The SharePoint file id. Output only.
"sharepointFolderId": "A String", # The ID of the SharePoint folder to download from.
"sharepointFolderPath": "A String", # The path of the SharePoint folder to download from.
"sharepointSiteName": "A String", # The name of the SharePoint site to download from. This can be the site name or the site id.
"tenantId": "A String", # Unique identifier of the Azure Active Directory Instance.
},
],
},
"sizeBytes": "A String", # Output only. The size of the RagFile in bytes.
"slackSource": { # The Slack source for the ImportRagFilesRequest. # The RagFile is imported from a Slack channel.
"channels": [ # Required. The Slack channels.
{ # SlackChannels contains the Slack channels and corresponding access token.
"apiKeyConfig": { # The API secret. # Required. The SecretManager secret version resource name (e.g. projects/{project}/secrets/{secret}/versions/{version}) storing the Slack channel access token that has access to the slack channel IDs. See: https://api.slack.com/tutorials/tracks/getting-a-token.
"apiKeySecretVersion": "A String", # Required. The SecretManager secret version resource name storing API key. e.g. projects/{project}/secrets/{secret}/versions/{version}
"apiKeyString": "A String", # The API key string. Either this or `api_key_secret_version` must be set.
},
"channels": [ # Required. The Slack channel IDs.
{ # SlackChannel contains the Slack channel ID and the time range to import.
"channelId": "A String", # Required. The Slack channel ID.
"endTime": "A String", # Optional. The ending timestamp for messages to import.
"startTime": "A String", # Optional. The starting timestamp for messages to import.
},
],
},
],
},
"updateTime": "A String", # Output only. Timestamp when this RagFile was last updated.
"userMetadata": "A String", # Output only. The metadata for metadata search. The user_metadata Needs to be in JSON format.
},
"uploadRagFileConfig": { # Config for uploading RagFile. # Required. The config for the RagFiles to be uploaded into the RagCorpus. VertexRagDataService.UploadRagFile.
"ragFileChunkingConfig": { # Specifies the size and overlap of chunks for RagFiles. # Specifies the size and overlap of chunks after uploading RagFile.
"chunkOverlap": 42, # The overlap between chunks.
"chunkSize": 42, # The size of the chunks.
"fixedLengthChunking": { # Specifies the fixed length chunking config. # Specifies the fixed length chunking config.
"chunkOverlap": 42, # The overlap between chunks.
"chunkSize": 42, # The size of the chunks.
},
},
"ragFileMetadataConfig": { # Metadata config for RagFile. # Specifies the metadata config for RagFiles. Including paths for metadata schema and metadata. Alteratively, inline metadata schema and metadata can be provided.
"gcsMetadataSchemaSource": { # The Google Cloud Storage location for the input content. # Google Cloud Storage location. Supports importing individual files as well as entire Google Cloud Storage directories. Sample formats: - `gs://bucket_name/my_directory/object_name/metadata_schema.json` - `gs://bucket_name/my_directory` If the user provides a directory, the metadata schema will be read from the files that ends with "metadata_schema.json" in the directory.
"uris": [ # Required. Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/wildcards.
"A String",
],
},
"gcsMetadataSource": { # The Google Cloud Storage location for the input content. # Google Cloud Storage location. Supports importing individual files as well as entire Google Cloud Storage directories. Sample formats: - `gs://bucket_name/my_directory/object_name/metadata.json` - `gs://bucket_name/my_directory` If the user provides a directory, the metadata will be read from the files that ends with "metadata.json" in the directory.
"uris": [ # Required. Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/wildcards.
"A String",
],
},
"googleDriveMetadataSchemaSource": { # The Google Drive location for the input content. # Google Drive location. Supports importing individual files as well as Google Drive folders. If the user provides a folder, the metadata schema will be read from the files that ends with "metadata_schema.json" in the directory.
"resourceIds": [ # Required. Google Drive resource IDs.
{ # The type and ID of the Google Drive resource.
"resourceId": "A String", # Required. The ID of the Google Drive resource.
"resourceType": "A String", # Required. The type of the Google Drive resource.
},
],
},
"googleDriveMetadataSource": { # The Google Drive location for the input content. # Google Drive location. Supports importing individual files as well as Google Drive folders. If the user provides a directory, the metadata will be read from the files that ends with "metadata.json" in the directory.
"resourceIds": [ # Required. Google Drive resource IDs.
{ # The type and ID of the Google Drive resource.
"resourceId": "A String", # Required. The ID of the Google Drive resource.
"resourceType": "A String", # Required. The type of the Google Drive resource.
},
],
},
"inlineMetadataSchemaSource": "A String", # Inline metadata schema source. Must be a JSON string.
"inlineMetadataSource": "A String", # Inline metadata source. Must be a JSON string.
},
"ragFileParsingConfig": { # Specifies the parsing config for RagFiles. # Optional. Specifies the parsing config for RagFiles. RAG will use the default parser if this field is not set.
"advancedParser": { # Specifies the advanced parsing for RagFiles. # The Advanced Parser to use for RagFiles.
"useAdvancedPdfParsing": True or False, # Whether to use advanced PDF parsing.
},
"layoutParser": { # Document AI Layout Parser config. # The Layout Parser to use for RagFiles.
"globalMaxParsingRequestsPerMin": 42, # The maximum number of requests the job is allowed to make to the Document AI processor per minute in this project. Consult https://cloud.google.com/document-ai/quotas and the Quota page for your project to set an appropriate value here. If this value is not specified, max_parsing_requests_per_min will be used by indexing pipeline as the global limit.
"maxParsingRequestsPerMin": 42, # The maximum number of requests the job is allowed to make to the Document AI processor per minute. Consult https://cloud.google.com/document-ai/quotas and the Quota page for your project to set an appropriate value here. If unspecified, a default value of 120 QPM would be used.
"processorName": "A String", # The full resource name of a Document AI processor or processor version. The processor must have type `LAYOUT_PARSER_PROCESSOR`. If specified, the `additional_config.parse_as_scanned_pdf` field must be false. Format: * `projects/{project_id}/locations/{location}/processors/{processor_id}` * `projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processor_version_id}`
},
"llmParser": { # Specifies the LLM parsing for RagFiles. # The LLM Parser to use for RagFiles.
"customParsingPrompt": "A String", # The prompt to use for parsing. If not specified, a default prompt will be used.
"globalMaxParsingRequestsPerMin": 42, # The maximum number of requests the job is allowed to make to the LLM model per minute in this project. Consult https://cloud.google.com/vertex-ai/generative-ai/docs/quotas and your document size to set an appropriate value here. If this value is not specified, max_parsing_requests_per_min will be used by indexing pipeline job as the global limit.
"maxParsingRequestsPerMin": 42, # The maximum number of requests the job is allowed to make to the LLM model per minute. Consult https://cloud.google.com/vertex-ai/generative-ai/docs/quotas and your document size to set an appropriate value here. If unspecified, a default value of 5000 QPM would be used.
"modelName": "A String", # The name of a LLM model used for parsing. Format: * `projects/{project_id}/locations/{location}/publishers/{publisher}/models/{model}`
},
"useAdvancedPdfParsing": True or False, # Whether to use advanced PDF parsing.
},
"ragFileTransformationConfig": { # Specifies the transformation config for RagFiles. # Specifies the transformation config for RagFiles.
"ragFileChunkingConfig": { # Specifies the size and overlap of chunks for RagFiles. # Specifies the chunking config for RagFiles.
"chunkOverlap": 42, # The overlap between chunks.
"chunkSize": 42, # The size of the chunks.
"fixedLengthChunking": { # Specifies the fixed length chunking config. # Specifies the fixed length chunking config.
"chunkOverlap": 42, # The overlap between chunks.
"chunkSize": 42, # The size of the chunks.
},
},
},
},
}
media_body: string, The filename of the media request body, or an instance of a MediaUpload object.
media_mime_type: string, The MIME type of the media request body, or an instance of a MediaUpload object.
x__xgafv: string, V1 error format.
Allowed values
1 - v1 error format
2 - v2 error format
Returns:
An object of the form:
{ # Response message for VertexRagDataService.UploadRagFile.
"error": { # The `Status` type defines a logical error model that is suitable for different programming environments, including REST APIs and RPC APIs. It is used by [gRPC](https://github.com/grpc). Each `Status` message contains three pieces of data: error code, error message, and error details. You can find out more about this error model and how to work with it in the [API Design Guide](https://cloud.google.com/apis/design/errors). # The error that occurred while processing the RagFile.
"code": 42, # The status code, which should be an enum value of google.rpc.Code.
"details": [ # A list of messages that carry the error details. There is a common set of message types for APIs to use.
{
"a_key": "", # Properties of the object. Contains field @type with type URL.
},
],
"message": "A String", # A developer-facing error message, which should be in English. Any user-facing error message should be localized and sent in the google.rpc.Status.details field, or localized by the client.
},
"ragFile": { # A RagFile contains user data for chunking, embedding and indexing. # The RagFile that had been uploaded into the RagCorpus.
"createTime": "A String", # Output only. Timestamp when this RagFile was created.
"description": "A String", # Optional. The description of the RagFile.
"directUploadSource": { # The input content is encapsulated and uploaded in the request. # Output only. The RagFile is encapsulated and uploaded in the UploadRagFile request.
},
"displayName": "A String", # Required. The display name of the RagFile. The name can be up to 128 characters long and can consist of any UTF-8 characters.
"fileStatus": { # RagFile status. # Output only. State of the RagFile.
"errorStatus": "A String", # Output only. Only when the `state` field is ERROR.
"state": "A String", # Output only. RagFile state.
},
"gcsSource": { # The Google Cloud Storage location for the input content. # Output only. Google Cloud Storage location of the RagFile. It does not support wildcards in the Cloud Storage uri for now.
"uris": [ # Required. Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/wildcards.
"A String",
],
},
"googleDriveSource": { # The Google Drive location for the input content. # Output only. Google Drive location. Supports importing individual files as well as Google Drive folders.
"resourceIds": [ # Required. Google Drive resource IDs.
{ # The type and ID of the Google Drive resource.
"resourceId": "A String", # Required. The ID of the Google Drive resource.
"resourceType": "A String", # Required. The type of the Google Drive resource.
},
],
},
"jiraSource": { # The Jira source for the ImportRagFilesRequest. # The RagFile is imported from a Jira query.
"jiraQueries": [ # Required. The Jira queries.
{ # JiraQueries contains the Jira queries and corresponding authentication.
"apiKeyConfig": { # The API secret. # Required. The SecretManager secret version resource name (e.g. projects/{project}/secrets/{secret}/versions/{version}) storing the Jira API key. See [Manage API tokens for your Atlassian account](https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/).
"apiKeySecretVersion": "A String", # Required. The SecretManager secret version resource name storing API key. e.g. projects/{project}/secrets/{secret}/versions/{version}
"apiKeyString": "A String", # The API key string. Either this or `api_key_secret_version` must be set.
},
"customQueries": [ # A list of custom Jira queries to import. For information about JQL (Jira Query Language), see https://support.atlassian.com/jira-service-management-cloud/docs/use-advanced-search-with-jira-query-language-jql/
"A String",
],
"email": "A String", # Required. The Jira email address.
"projects": [ # A list of Jira projects to import in their entirety.
"A String",
],
"serverUri": "A String", # Required. The Jira server URI.
},
],
},
"name": "A String", # Output only. The resource name of the RagFile.
"ragFileType": "A String", # Output only. The type of the RagFile.
"sharePointSources": { # The SharePointSources to pass to ImportRagFiles. # The RagFile is imported from a SharePoint source.
"sharePointSources": [ # The SharePoint sources.
{ # An individual SharePointSource.
"clientId": "A String", # The Application ID for the app registered in Microsoft Azure Portal. The application must also be configured with MS Graph permissions "Files.ReadAll", "Sites.ReadAll" and BrowserSiteLists.Read.All.
"clientSecret": { # The API secret. # The application secret for the app registered in Azure.
"apiKeySecretVersion": "A String", # Required. The SecretManager secret version resource name storing API key. e.g. projects/{project}/secrets/{secret}/versions/{version}
"apiKeyString": "A String", # The API key string. Either this or `api_key_secret_version` must be set.
},
"driveId": "A String", # The ID of the drive to download from.
"driveName": "A String", # The name of the drive to download from.
"fileId": "A String", # Output only. The SharePoint file id. Output only.
"sharepointFolderId": "A String", # The ID of the SharePoint folder to download from.
"sharepointFolderPath": "A String", # The path of the SharePoint folder to download from.
"sharepointSiteName": "A String", # The name of the SharePoint site to download from. This can be the site name or the site id.
"tenantId": "A String", # Unique identifier of the Azure Active Directory Instance.
},
],
},
"sizeBytes": "A String", # Output only. The size of the RagFile in bytes.
"slackSource": { # The Slack source for the ImportRagFilesRequest. # The RagFile is imported from a Slack channel.
"channels": [ # Required. The Slack channels.
{ # SlackChannels contains the Slack channels and corresponding access token.
"apiKeyConfig": { # The API secret. # Required. The SecretManager secret version resource name (e.g. projects/{project}/secrets/{secret}/versions/{version}) storing the Slack channel access token that has access to the slack channel IDs. See: https://api.slack.com/tutorials/tracks/getting-a-token.
"apiKeySecretVersion": "A String", # Required. The SecretManager secret version resource name storing API key. e.g. projects/{project}/secrets/{secret}/versions/{version}
"apiKeyString": "A String", # The API key string. Either this or `api_key_secret_version` must be set.
},
"channels": [ # Required. The Slack channel IDs.
{ # SlackChannel contains the Slack channel ID and the time range to import.
"channelId": "A String", # Required. The Slack channel ID.
"endTime": "A String", # Optional. The ending timestamp for messages to import.
"startTime": "A String", # Optional. The starting timestamp for messages to import.
},
],
},
],
},
"updateTime": "A String", # Output only. Timestamp when this RagFile was last updated.
"userMetadata": "A String", # Output only. The metadata for metadata search. The user_metadata Needs to be in JSON format.
},
}